diff -Nru x264-0.152.2854+gite9a5903/autocomplete.c x264-0.158.2988+git-20191101.7817004/autocomplete.c --- x264-0.152.2854+gite9a5903/autocomplete.c 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/autocomplete.c 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,405 @@ +/***************************************************************************** + * autocomplete: x264cli shell autocomplete + ***************************************************************************** + * Copyright (C) 2018-2019 x264 project + * + * Authors: Henrik Gramner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "x264cli.h" +#include "input/input.h" + +#if HAVE_LAVF +#undef DECLARE_ALIGNED +#include +#include +#endif + +static const char * const level_names[] = +{ + "1", "1.1", "1.2", "1.3", "1b", + "2", "2.1", "2.2", + "3", "3.1", "3.2", + "4", "4.1", "4.2", + "5", "5.1", "5.2", + "6", "6.1", "6.2", + NULL +}; + +/* Options requiring a value for which we provide suggestions. */ +static const char * const opts_suggest[] = +{ + "--alternative-transfer", + "--aq-mode", + "--asm", + "--avcintra-class", + "--avcintra-flavor", + "--b-adapt", + "--b-pyramid", + "--colormatrix", + "--colorprim", + "--cqm", + "--demuxer", + "--direct", + "--frame-packing", + "--input-csp", + "--input-fmt", + "--input-range", + "--level", + "--log-level", + "--me", + "--muxer", + "--nal-hrd", + "--output-csp", + "--overscan", + "--pass", "-p", + "--preset", + "--profile", + "--pulldown", + "--range", + "--subme", "-m", + "--transfer", + "--trellis", "-t", + "--tune", + "--videoformat", + "--weightp", + NULL +}; + +/* Options requiring a value for which we don't provide suggestions. */ +static const char * const opts_nosuggest[] = +{ + "--b-bias", + "--bframes", "-b", + "--deblock", "-f", + "--bitrate", "-B", + "--chroma-qp-offset", + "--chromaloc", + "--cplxblur", + "--cqm4", + "--cqm4i", + "--cqm4ic", + "--cqm4iy", + "--cqm4p", + "--cqm4pc", + "--cqm4py", + "--cqm8", + "--cqm8i", + "--cqm8p", + "--crf", + "--crf-max", + "--crop-rect", + "--deadzone-inter", + "--deadzone-intra", + "--fps", + "--frames", + "--input-depth", + "--input-res", + "--ipratio", + "--keyint", "-I", + "--lookahead-threads", + "--merange", + "--min-keyint", "-i", + "--mvrange", + "--mvrange-thread", + "--nr", + "--opencl-device", + "--output-depth", + "--partitions", "-A", + "--pbratio", + "--psy-rd", + "--qblur", + "--qcomp", + "--qp", "-q", + "--qpmax", + "--qpmin", + "--qpstep", + "--ratetol", + "--ref", "-r", + "--rc-lookahead", + "--sar", + "--scenecut", + "--seek", + "--slices", + "--slices-max", + "--slice-max-size", + "--slice-max-mbs", + "--slice-min-mbs", + "--sps-id", + "--sync-lookahead", + "--threads", + "--timebase", + "--vbv-bufsize", + "--vbv-init", + "--vbv-maxrate", + "--video-filter", "--vf", + "--zones", + NULL +}; + +/* Options requiring a filename. */ +static const char * const opts_filename[] = +{ + "--cqmfile", + "--dump-yuv", + "--index", + "--opencl-clbin", + "--output", "-o", + "--qpfile", + "--stats", + "--tcfile-in", + "--tcfile-out", + NULL +}; + +/* Options without an associated value. */ +static const char * const opts_standalone[] = +{ + "--8x8dct", + "--aud", + "--bff", + "--bluray-compat", + "--cabac", + "--constrained-intra", + "--cpu-independent", + "--dts-compress", + "--fake-interlaced", + "--fast-pskip", + "--filler", + "--force-cfr", + "--mbtree", + "--mixed-refs", + "--no-8x8dct", + "--no-asm", + "--no-cabac", + "--no-chroma-me", + "--no-dct-decimate", + "--no-deblock", + "--no-fast-pskip", + "--no-mbtree", + "--no-mixed-refs", + "--no-progress", + "--no-psy", + "--no-scenecut", + "--no-weightb", + "--non-deterministic", + "--open-gop", + "--opencl", + "--pic-struct", + "--psnr", + "--quiet", + "--sliced-threads", + "--slow-firstpass", + "--ssim", + "--stitchable", + "--tff", + "--thread-input", + "--verbose", "-v", + "--weightb", + NULL +}; + +/* Options which shouldn't be suggested in combination with other options. */ +static const char * const opts_special[] = +{ + "--fullhelp", + "--help", "-h", + "--longhelp", + "--version", + NULL +}; + +static int list_contains( const char * const *list, const char *s ) +{ + if( *s ) + for( ; *list; list++ ) + if( !strcmp( *list, s ) ) + return 1; + return 0; +} + +static void suggest( const char *s, const char *cur, int cur_len ) +{ + if( s && *s && !strncmp( s, cur, cur_len ) ) + printf( "%s\n", s ); +} + +static void suggest_lower( const char *s, const char *cur, int cur_len ) +{ + if( s && *s && !strncasecmp( s, cur, cur_len ) ) + { + for( ; *s; s++ ) + putchar( *s < 'A' || *s > 'Z' ? *s : *s | 0x20 ); + putchar( '\n' ); + } +} + +static void suggest_num_range( int start, int end, const char *cur, int cur_len ) +{ + char buf[16]; + for( int i = start; i <= end; i++ ) + { + snprintf( buf, sizeof( buf ), "%d", i ); + suggest( buf, cur, cur_len ); + } +} + +#if HAVE_LAVF +/* Suggest each token in a string separated by delimiters. */ +static void suggest_token( const char *s, int delim, const char *cur, int cur_len ) +{ + if( s && *s ) + { + for( const char *tok_end; (tok_end = strchr( s, delim )); s = tok_end + 1 ) + { + int tok_len = tok_end - s; + if( tok_len && tok_len >= cur_len && !strncmp( s, cur, cur_len ) ) + printf( "%.*s\n", tok_len, s ); + } + suggest( s, cur, cur_len ); + } +} +#endif + +#define OPT( opt ) else if( !strcmp( prev, opt ) ) +#define OPT2( opt1, opt2 ) else if( !strcmp( prev, opt1 ) || !strcmp( prev, opt2 ) ) +#define OPT_TYPE( type ) list_contains( opts_##type, prev ) + +#define suggest( s ) suggest( s, cur, cur_len ) +#define suggest_lower( s ) suggest_lower( s, cur, cur_len ) +#define suggest_list( list ) for( const char * const *s = list; *s; s++ ) suggest( *s ) +#define suggest_num_range( start, end ) suggest_num_range( start, end, cur, cur_len ) +#define suggest_token( s, delim ) suggest_token( s, delim, cur, cur_len ) + +int x264_cli_autocomplete( const char *prev, const char *cur ) +{ + int cur_len = strlen( cur ); + if( 0 ); + OPT( "--alternative-transfer" ) + suggest_list( x264_transfer_names ); + OPT( "--aq-mode" ) + suggest_num_range( 0, 3 ); + OPT( "--asm" ) + for( const x264_cpu_name_t *cpu = x264_cpu_names; cpu->flags; cpu++ ) + suggest_lower( cpu->name ); + OPT( "--avcintra-class" ) + suggest_list( x264_avcintra_class_names ); + OPT( "--avcintra-flavor" ) + suggest_list( x264_avcintra_flavor_names ); + OPT( "--b-adapt" ) + suggest_num_range( 0, 2 ); + OPT( "--b-pyramid" ) + suggest_list( x264_b_pyramid_names ); + OPT( "--colormatrix" ) + suggest_list( x264_colmatrix_names ); + OPT( "--colorprim" ) + suggest_list( x264_colorprim_names ); + OPT( "--cqm" ) + suggest_list( x264_cqm_names ); + OPT( "--demuxer" ) + suggest_list( x264_demuxer_names ); + OPT( "--direct" ) + suggest_list( x264_direct_pred_names ); + OPT( "--frame-packing" ) + suggest_num_range( 0, 7 ); + OPT( "--input-csp" ) + { + for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ ) + suggest( x264_cli_csps[i].name ); +#if HAVE_LAVF + for( const AVPixFmtDescriptor *d = NULL; (d = av_pix_fmt_desc_next( d )); ) + suggest( d->name ); +#endif + } + OPT( "--input-fmt" ) + { +#if HAVE_LAVF + av_register_all(); + for( const AVInputFormat *f = NULL; (f = av_iformat_next( f )); ) + suggest_token( f->name, ',' ); +#endif + } + OPT( "--input-range" ) + suggest_list( x264_range_names ); + OPT( "--level" ) + suggest_list( level_names ); + OPT( "--log-level" ) + suggest_list( x264_log_level_names ); + OPT( "--me" ) + suggest_list( x264_motion_est_names ); + OPT( "--muxer" ) + suggest_list( x264_muxer_names ); + OPT( "--nal-hrd" ) + suggest_list( x264_nal_hrd_names ); + OPT( "--output-csp" ) + suggest_list( x264_output_csp_names ); + OPT( "--output-depth" ) + { +#if HAVE_BITDEPTH8 + suggest( "8" ); +#endif +#if HAVE_BITDEPTH10 + suggest( "10" ); +#endif + } + OPT( "--overscan" ) + suggest_list( x264_overscan_names ); + OPT2( "--partitions", "-A" ) + suggest_list( x264_partition_names ); + OPT2( "--pass", "-p" ) + suggest_num_range( 1, 3 ); + OPT( "--preset" ) + suggest_list( x264_preset_names ); + OPT( "--profile" ) + suggest_list( x264_valid_profile_names ); + OPT( "--pulldown" ) + suggest_list( x264_pulldown_names ); + OPT( "--range" ) + suggest_list( x264_range_names ); + OPT2( "--subme", "-m" ) + suggest_num_range( 0, 11 ); + OPT( "--transfer" ) + suggest_list( x264_transfer_names ); + OPT2( "--trellis", "-t" ) + suggest_num_range( 0, 2 ); + OPT( "--tune" ) + suggest_list( x264_tune_names ); + OPT( "--videoformat" ) + suggest_list( x264_vidformat_names ); + OPT( "--weightp" ) + suggest_num_range( 0, 2 ); + else if( !OPT_TYPE( nosuggest ) && !OPT_TYPE( special ) ) + { + if( OPT_TYPE( filename ) || strncmp( cur, "--", 2 ) ) + return 1; /* Fall back to default shell filename autocomplete. */ + + /* Suggest options. */ + suggest_list( opts_suggest ); + suggest_list( opts_nosuggest ); + suggest_list( opts_filename ); + suggest_list( opts_standalone ); + + /* Only suggest special options if no other options have been specified. */ + if( !*prev ) + suggest_list( opts_special ); + } + + return 0; +} diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.c x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.c --- x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.c 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm-offsets.c: check asm offsets for aarch64 ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.h x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.h --- x264-0.152.2854+gite9a5903/common/aarch64/asm-offsets.h 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm-offsets.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm-offsets.h: asm offsets for aarch64 ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/asm.S x264-0.158.2988+git-20191101.7817004/common/aarch64/asm.S --- x264-0.152.2854+gite9a5903/common/aarch64/asm.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/asm.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: AArch64 utility macros ***************************************************************************** - * Copyright (C) 2008-2017 x264 project + * Copyright (C) 2008-2019 x264 project * * Authors: Mans Rullgard * David Conrad @@ -27,12 +27,27 @@ #include "config.h" +#define GLUE(a, b) a ## b +#define JOIN(a, b) GLUE(a, b) + #ifdef PREFIX -# define EXTERN_ASM _ +# define BASE _x264_ +# define SYM_PREFIX _ +#else +# define BASE x264_ +# define SYM_PREFIX +#endif + +#ifdef BIT_DEPTH +# define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _) #else -# define EXTERN_ASM +# define EXTERN_ASM BASE #endif +#define X(s) JOIN(EXTERN_ASM, s) +#define X264(s) JOIN(BASE, s) +#define EXT(s) JOIN(SYM_PREFIX, s) + #ifdef __ELF__ # define ELF #else @@ -53,7 +68,11 @@ .macro function name, export=0, align=2 .macro endfunc +.if \export +ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name +.else ELF .size \name, . - \name +.endif FUNC .endfunc .purgem endfunc .endm @@ -94,10 +113,6 @@ #endif .endm -#define GLUE(a, b) a ## b -#define JOIN(a, b) GLUE(a, b) -#define X(s) JOIN(EXTERN_ASM, s) - #define FDEC_STRIDE 32 #define FENC_STRIDE 16 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/bitstream-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/bitstream-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream-a.S: aarch64 bitstream functions ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * @@ -25,7 +25,7 @@ #include "asm.S" -function x264_nal_escape_neon, export=1 +function nal_escape_neon, export=1 movi v0.16b, #0xff movi v4.16b, #4 mov w3, #3 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/bitstream.h x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream.h --- x264-0.152.2854+gite9a5903/common/aarch64/bitstream.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/bitstream.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,32 @@ +/***************************************************************************** + * bitstream.h: aarch64 bitstream functions + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_AARCH64_BITSTREAM_H +#define X264_AARCH64_BITSTREAM_H + +#define x264_nal_escape_neon x264_template(nal_escape_neon) +uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/cabac-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/cabac-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/cabac-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/cabac-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac-a.S: aarch64 cabac ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * @@ -29,9 +29,9 @@ // w11 holds x264_cabac_t.i_low // w12 holds x264_cabac_t.i_range -function x264_cabac_encode_decision_asm, export=1 - movrel x8, X(x264_cabac_range_lps) - movrel x9, X(x264_cabac_transition) +function cabac_encode_decision_asm, export=1 + movrel x8, X264(cabac_range_lps) + movrel x9, X264(cabac_transition) add w10, w1, #CABAC_STATE ldrb w3, [x0, x10] // i_state ldr w12, [x0, #CABAC_I_RANGE] @@ -82,10 +82,10 @@ 1: ldr x7, [x0, #CABAC_P] asr w5, w4, #8 // carry - ldrb w8, [x7, #-1] + ldurb w8, [x7, #-1] add w8, w8, w5 sub w5, w5, #1 - strb w8, [x7, #-1] + sturb w8, [x7, #-1] cbz w6, 3f 2: subs w6, w6, #1 @@ -101,7 +101,7 @@ ret endfunc -function x264_cabac_encode_bypass_asm, export=1 +function cabac_encode_bypass_asm, export=1 ldr w12, [x0, #CABAC_I_RANGE] ldr w11, [x0, #CABAC_I_LOW] ldr w2, [x0, #CABAC_I_QUEUE] @@ -114,7 +114,7 @@ ret endfunc -function x264_cabac_encode_terminal_asm, export=1 +function cabac_encode_terminal_asm, export=1 ldr w12, [x0, #CABAC_I_RANGE] ldr w11, [x0, #CABAC_I_LOW] sub w12, w12, #2 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/dct-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/dct-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/dct-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/dct-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -79,7 +79,7 @@ .endm -function x264_dct4x4dc_neon, export=1 +function dct4x4dc_neon, export=1 ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0] movi v31.4h, #1 SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h @@ -102,7 +102,7 @@ ret endfunc -function x264_idct4x4dc_neon, export=1 +function idct4x4dc_neon, export=1 ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0] SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h SUMSUB_AB v6.4h, v7.4h, v2.4h, v3.4h @@ -131,7 +131,7 @@ sub \v3, \v7, \v5 .endm -function x264_sub4x4_dct_neon, export=1 +function sub4x4_dct_neon, export=1 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE ld1 {v0.s}[0], [x1], x3 @@ -154,7 +154,7 @@ ret endfunc -function x264_sub8x4_dct_neon +function sub8x4_dct_neon ld1 {v0.8b}, [x1], x3 ld1 {v1.8b}, [x2], x4 usubl v16.8h, v0.8b, v1.8b @@ -193,34 +193,34 @@ ret endfunc -function x264_sub8x8_dct_neon, export=1 +function sub8x8_dct_neon, export=1 mov x5, x30 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon mov x30, x5 - b x264_sub8x4_dct_neon + b sub8x4_dct_neon endfunc -function x264_sub16x16_dct_neon, export=1 +function sub16x16_dct_neon, export=1 mov x5, x30 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub x1, x1, #8*FENC_STRIDE-8 sub x2, x2, #8*FDEC_STRIDE-8 - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub x1, x1, #8 sub x2, x2, #8 - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub x1, x1, #8*FENC_STRIDE-8 sub x2, x2, #8*FDEC_STRIDE-8 - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon mov x30, x5 - b x264_sub8x4_dct_neon + b sub8x4_dct_neon endfunc @@ -255,7 +255,7 @@ SUMSUB_SHR2 2, v3.8h, v5.8h, v30.8h, v29.8h, v20.8h, v21.8h .endm -function x264_sub8x8_dct8_neon, export=1 +function sub8x8_dct8_neon, export=1 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE ld1 {v16.8b}, [x1], x3 @@ -292,19 +292,19 @@ ret endfunc -function x264_sub16x16_dct8_neon, export=1 +function sub16x16_dct8_neon, export=1 mov x7, x30 - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) sub x1, x1, #FENC_STRIDE*8 - 8 sub x2, x2, #FDEC_STRIDE*8 - 8 - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) sub x1, x1, #8 sub x2, x2, #8 - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) mov x30, x7 sub x1, x1, #FENC_STRIDE*8 - 8 sub x2, x2, #FDEC_STRIDE*8 - 8 - b X(x264_sub8x8_dct8_neon) + b X(sub8x8_dct8_neon) endfunc @@ -317,7 +317,7 @@ add \d6, \d6, \d1 .endm -function x264_add4x4_idct_neon, export=1 +function add4x4_idct_neon, export=1 mov x2, #FDEC_STRIDE ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x1] @@ -357,7 +357,7 @@ ret endfunc -function x264_add8x4_idct_neon, export=1 +function add8x4_idct_neon, export=1 ld1 {v0.8h,v1.8h}, [x1], #32 ld1 {v2.8h,v3.8h}, [x1], #32 transpose v20.2d, v21.2d, v0.2d, v2.2d @@ -398,29 +398,29 @@ ret endfunc -function x264_add8x8_idct_neon, export=1 +function add8x8_idct_neon, export=1 mov x2, #FDEC_STRIDE mov x5, x30 - bl X(x264_add8x4_idct_neon) + bl X(add8x4_idct_neon) mov x30, x5 - b X(x264_add8x4_idct_neon) + b X(add8x4_idct_neon) endfunc -function x264_add16x16_idct_neon, export=1 +function add16x16_idct_neon, export=1 mov x2, #FDEC_STRIDE mov x5, x30 - bl X(x264_add8x4_idct_neon) - bl X(x264_add8x4_idct_neon) + bl X(add8x4_idct_neon) + bl X(add8x4_idct_neon) sub x0, x0, #8*FDEC_STRIDE-8 - bl X(x264_add8x4_idct_neon) - bl X(x264_add8x4_idct_neon) + bl X(add8x4_idct_neon) + bl X(add8x4_idct_neon) sub x0, x0, #8 - bl X(x264_add8x4_idct_neon) - bl X(x264_add8x4_idct_neon) + bl X(add8x4_idct_neon) + bl X(add8x4_idct_neon) sub x0, x0, #8*FDEC_STRIDE-8 - bl X(x264_add8x4_idct_neon) + bl X(add8x4_idct_neon) mov x30, x5 - b X(x264_add8x4_idct_neon) + b X(add8x4_idct_neon) endfunc .macro IDCT8_1D type @@ -446,7 +446,7 @@ SUMSUB_AB v19.8h, v20.8h, v2.8h, v20.8h .endm -function x264_add8x8_idct8_neon, export=1 +function add8x8_idct8_neon, export=1 mov x2, #FDEC_STRIDE ld1 {v16.8h,v17.8h}, [x1], #32 ld1 {v18.8h,v19.8h}, [x1], #32 @@ -503,19 +503,19 @@ ret endfunc -function x264_add16x16_idct8_neon, export=1 +function add16x16_idct8_neon, export=1 mov x7, x30 - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub x0, x0, #8*FDEC_STRIDE-8 - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub x0, x0, #8 - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub x0, x0, #8*FDEC_STRIDE-8 mov x30, x7 - b X(x264_add8x8_idct8_neon) + b X(add8x8_idct8_neon) endfunc -function x264_add8x8_idct_dc_neon, export=1 +function add8x8_idct_dc_neon, export=1 mov x2, #FDEC_STRIDE ld1 {v16.4h}, [x1] ld1 {v0.8b}, [x0], x2 @@ -605,7 +605,7 @@ st1 {v7.16b}, [x2], x3 .endm -function x264_add16x16_idct_dc_neon, export=1 +function add16x16_idct_dc_neon, export=1 mov x2, x0 mov x3, #FDEC_STRIDE @@ -640,7 +640,7 @@ add \dst\().8h, \dst\().8h, \t3\().8h .endm -function x264_sub8x8_dct_dc_neon, export=1 +function sub8x8_dct_dc_neon, export=1 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE @@ -660,7 +660,7 @@ ret endfunc -function x264_sub8x16_dct_dc_neon, export=1 +function sub8x16_dct_dc_neon, export=1 mov x3, #FENC_STRIDE mov x4, #FDEC_STRIDE sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23 @@ -689,7 +689,7 @@ ret endfunc -function x264_zigzag_interleave_8x8_cavlc_neon, export=1 +function zigzag_interleave_8x8_cavlc_neon, export=1 mov x3, #7 movi v31.4s, #1 ld4 {v0.8h,v1.8h,v2.8h,v3.8h}, [x1], #64 @@ -718,7 +718,7 @@ ret endfunc -function x264_zigzag_scan_4x4_frame_neon, export=1 +function zigzag_scan_4x4_frame_neon, export=1 movrel x2, scan4x4_frame ld1 {v0.16b,v1.16b}, [x1] ld1 {v16.16b,v17.16b}, [x2] @@ -729,7 +729,7 @@ endfunc .macro zigzag_sub_4x4 f ac -function x264_zigzag_sub_4x4\ac\()_\f\()_neon, export=1 +function zigzag_sub_4x4\ac\()_\f\()_neon, export=1 mov x9, #FENC_STRIDE mov x4, #FDEC_STRIDE movrel x5, sub4x4_\f @@ -772,7 +772,7 @@ zigzag_sub_4x4 frame zigzag_sub_4x4 frame, ac -function x264_zigzag_scan_4x4_field_neon, export=1 +function zigzag_scan_4x4_field_neon, export=1 movrel x2, scan4x4_field ld1 {v0.8h,v1.8h}, [x1] ld1 {v16.16b}, [x2] @@ -781,7 +781,7 @@ ret endfunc -function x264_zigzag_scan_8x8_frame_neon, export=1 +function zigzag_scan_8x8_frame_neon, export=1 movrel x2, scan8x8_frame ld1 {v0.8h,v1.8h}, [x1], #32 ld1 {v2.8h,v3.8h}, [x1], #32 @@ -841,7 +841,7 @@ .byte T(7,5), T(7,6), T(6,7), T(7,7) endconst -function x264_zigzag_scan_8x8_field_neon, export=1 +function zigzag_scan_8x8_field_neon, export=1 movrel x2, scan8x8_field ld1 {v0.8h,v1.8h}, [x1], #32 ld1 {v2.8h,v3.8h}, [x1], #32 @@ -868,7 +868,7 @@ endfunc .macro zigzag_sub8x8 f -function x264_zigzag_sub_8x8_\f\()_neon, export=1 +function zigzag_sub_8x8_\f\()_neon, export=1 movrel x4, sub8x8_\f mov x5, #FENC_STRIDE mov x6, #FDEC_STRIDE diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/dct.h x264-0.158.2988+git-20191101.7817004/common/aarch64/dct.h --- x264-0.152.2854+gite9a5903/common/aarch64/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,41 +27,68 @@ #ifndef X264_AARCH64_DCT_H #define X264_AARCH64_DCT_H +#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon) void x264_dct4x4dc_neon( int16_t d[16] ); +#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon) void x264_idct4x4dc_neon( int16_t d[16] ); +#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon) void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon) void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon) void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon) void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] ); +#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon) void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] ); +#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon) void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] ); +#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon) void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] ); +#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon) void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] ); +#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon) void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon) void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon) void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon) void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon) void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] ); +#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon) void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] ); +#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon) void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_4x4_field_neon x264_template(zigzag_scan_4x4_field_neon) void x264_zigzag_scan_4x4_field_neon( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_8x8_frame_neon x264_template(zigzag_scan_8x8_frame_neon) void x264_zigzag_scan_8x8_frame_neon( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_field_neon x264_template(zigzag_scan_8x8_field_neon) void x264_zigzag_scan_8x8_field_neon( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_sub_4x4_field_neon x264_template(zigzag_sub_4x4_field_neon) int x264_zigzag_sub_4x4_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +#define x264_zigzag_sub_4x4ac_field_neon x264_template(zigzag_sub_4x4ac_field_neon) int x264_zigzag_sub_4x4ac_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ); +#define x264_zigzag_sub_4x4_frame_neon x264_template(zigzag_sub_4x4_frame_neon) int x264_zigzag_sub_4x4_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +#define x264_zigzag_sub_4x4ac_frame_neon x264_template(zigzag_sub_4x4ac_frame_neon) int x264_zigzag_sub_4x4ac_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst, dctcoef *dc ); +#define x264_zigzag_sub_8x8_field_neon x264_template(zigzag_sub_8x8_field_neon) int x264_zigzag_sub_8x8_field_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +#define x264_zigzag_sub_8x8_frame_neon x264_template(zigzag_sub_8x8_frame_neon) int x264_zigzag_sub_8x8_frame_neon( dctcoef level[16], const pixel *p_src, pixel *p_dst ); +#define x264_zigzag_interleave_8x8_cavlc_neon x264_template(zigzag_interleave_8x8_cavlc_neon) void x264_zigzag_interleave_8x8_cavlc_neon( dctcoef *dst, dctcoef *src, uint8_t *nnz ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/deblock-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/deblock-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: aarch64 deblocking ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Mans Rullgard * Janne Grunau @@ -108,7 +108,7 @@ sqxtun2 v0.16b, v24.8h .endm -function x264_deblock_v_luma_neon, export=1 +function deblock_v_luma_neon, export=1 h264_loop_filter_start ld1 {v0.16b}, [x0], x1 @@ -131,7 +131,7 @@ ret endfunc -function x264_deblock_h_luma_neon, export=1 +function deblock_h_luma_neon, export=1 h264_loop_filter_start sub x0, x0, #4 @@ -302,7 +302,7 @@ bit v2.16b, v26.16b, v18.16b // q2'_2 .endm -function x264_deblock_v_luma_intra_neon, export=1 +function deblock_v_luma_intra_neon, export=1 h264_loop_filter_start_intra ld1 {v0.16b}, [x0], x1 // q0 @@ -328,7 +328,7 @@ ret endfunc -function x264_deblock_h_luma_intra_neon, export=1 +function deblock_h_luma_intra_neon, export=1 h264_loop_filter_start_intra sub x0, x0, #4 @@ -421,7 +421,7 @@ sqxtun2 v0.16b, v23.8h .endm -function x264_deblock_v_chroma_neon, export=1 +function deblock_v_chroma_neon, export=1 h264_loop_filter_start sub x0, x0, x1, lsl #1 @@ -439,7 +439,7 @@ ret endfunc -function x264_deblock_h_chroma_neon, export=1 +function deblock_h_chroma_neon, export=1 h264_loop_filter_start sub x0, x0, #4 @@ -472,7 +472,7 @@ ret endfunc -function x264_deblock_h_chroma_422_neon, export=1 +function deblock_h_chroma_422_neon, export=1 add x5, x0, x1 sub x0, x0, #4 add x1, x1, x1 @@ -516,7 +516,7 @@ sqxtun v17.8b, v22.8h .endm -function x264_deblock_h_chroma_mbaff_neon, export=1 +function deblock_h_chroma_mbaff_neon, export=1 h264_loop_filter_start sub x4, x0, #4 @@ -575,7 +575,7 @@ bit v17.16b, v25.16b, v26.16b .endm -function x264_deblock_v_chroma_intra_neon, export=1 +function deblock_v_chroma_intra_neon, export=1 h264_loop_filter_start_intra sub x0, x0, x1, lsl #1 @@ -593,7 +593,7 @@ ret endfunc -function x264_deblock_h_chroma_intra_mbaff_neon, export=1 +function deblock_h_chroma_intra_mbaff_neon, export=1 h264_loop_filter_start_intra sub x4, x0, #4 @@ -615,7 +615,7 @@ ret endfunc -function x264_deblock_h_chroma_intra_neon, export=1 +function deblock_h_chroma_intra_neon, export=1 h264_loop_filter_start_intra sub x4, x0, #4 @@ -645,7 +645,7 @@ ret endfunc -function x264_deblock_h_chroma_422_intra_neon, export=1 +function deblock_h_chroma_422_intra_neon, export=1 h264_loop_filter_start_intra sub x4, x0, #4 @@ -697,12 +697,12 @@ ret endfunc -//static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], -// int8_t ref[2][X264_SCAN8_LUMA_SIZE], -// int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], -// uint8_t bs[2][8][4], int mvy_limit, -// int bframe ) -function x264_deblock_strength_neon, export=1 +// void deblock_strength( uint8_t nnz[X264_SCAN8_SIZE], +// int8_t ref[2][X264_SCAN8_LUMA_SIZE], +// int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], +// uint8_t bs[2][8][4], int mvy_limit, +// int bframe ) +function deblock_strength_neon, export=1 movi v4.16b, #0 lsl w4, w4, #8 add x3, x3, #32 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/deblock.h x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock.h --- x264-0.152.2854+gite9a5903/common/aarch64/deblock.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/deblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,58 @@ +/***************************************************************************** + * deblock.h: aarch64 deblocking + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_AARCH64_DEBLOCK_H +#define X264_AARCH64_DEBLOCK_H + +#define x264_deblock_v_luma_neon x264_template(deblock_v_luma_neon) +void x264_deblock_v_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_neon x264_template(deblock_h_luma_neon) +void x264_deblock_h_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_neon x264_template(deblock_v_chroma_neon) +void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_neon x264_template(deblock_h_chroma_neon) +void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_strength_neon x264_template(deblock_strength_neon) +void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_h_chroma_422_neon x264_template(deblock_h_chroma_422_neon) +void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mbaff_neon x264_template(deblock_h_chroma_mbaff_neon) +void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_intra_mbaff_neon x264_template(deblock_h_chroma_intra_mbaff_neon) +void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_neon x264_template(deblock_h_chroma_intra_neon) +void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_422_intra_neon x264_template(deblock_h_chroma_422_intra_neon) +void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_neon x264_template(deblock_v_chroma_intra_neon) +void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_luma_intra_neon x264_template(deblock_h_luma_intra_neon) +void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_luma_intra_neon x264_template(deblock_v_luma_intra_neon) +void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/mc-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -31,7 +31,7 @@ // note: prefetch stuff assumes 64-byte cacheline // void prefetch_ref( uint8_t *pix, intptr_t stride, int parity ) -function x264_prefetch_ref_aarch64, export=1 +function prefetch_ref_aarch64, export=1 cmp w2, #1 csel x2, xzr, x1, eq add x0, x0, #64 @@ -54,8 +54,8 @@ // void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y, // uint8_t *pix_uv, intptr_t stride_uv, int mb_x ) -.macro x264_prefetch_fenc sub -function x264_prefetch_fenc_\sub\()_aarch64, export=1 +.macro prefetch_fenc sub +function prefetch_fenc_\sub\()_aarch64, export=1 and w6, w5, #3 and w7, w5, #3 mul x6, x6, x1 @@ -82,14 +82,14 @@ endfunc .endm -x264_prefetch_fenc 420 -x264_prefetch_fenc 422 +prefetch_fenc 420 +prefetch_fenc 422 // void pixel_avg( uint8_t *dst, intptr_t dst_stride, // uint8_t *src1, intptr_t src1_stride, // uint8_t *src2, intptr_t src2_stride, int weight ); .macro AVGH w h -function x264_pixel_avg_\w\()x\h\()_neon, export=1 +function pixel_avg_\w\()x\h\()_neon, export=1 mov w10, #64 cmp w6, #32 mov w9, #\h @@ -292,7 +292,7 @@ ret endfunc -function x264_pixel_avg2_w4_neon, export=1 +function pixel_avg2_w4_neon, export=1 1: subs w5, w5, #2 ld1 {v0.s}[0], [x2], x3 @@ -307,7 +307,7 @@ ret endfunc -function x264_pixel_avg2_w8_neon, export=1 +function pixel_avg2_w8_neon, export=1 1: subs w5, w5, #2 ld1 {v0.8b}, [x2], x3 @@ -322,7 +322,7 @@ ret endfunc -function x264_pixel_avg2_w16_neon, export=1 +function pixel_avg2_w16_neon, export=1 1: subs w5, w5, #2 ld1 {v0.16b}, [x2], x3 @@ -337,7 +337,7 @@ ret endfunc -function x264_pixel_avg2_w20_neon, export=1 +function pixel_avg2_w20_neon, export=1 sub x1, x1, #16 1: subs w5, w5, #2 @@ -373,7 +373,7 @@ // void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, // intptr_t dst_stride, const x264_weight_t *weight, int h ) -function x264_mc_weight_w20_neon, export=1 +function mc_weight_w20_neon, export=1 weight_prologue full sub x1, x1, #16 1: @@ -409,7 +409,7 @@ ret endfunc -function x264_mc_weight_w16_neon, export=1 +function mc_weight_w16_neon, export=1 weight_prologue full weight16_loop: 1: @@ -438,7 +438,7 @@ ret endfunc -function x264_mc_weight_w8_neon, export=1 +function mc_weight_w8_neon, export=1 weight_prologue full 1: subs w9, w9, #2 @@ -458,7 +458,7 @@ ret endfunc -function x264_mc_weight_w4_neon, export=1 +function mc_weight_w4_neon, export=1 weight_prologue full 1: subs w9, w9, #2 @@ -474,7 +474,7 @@ ret endfunc -function x264_mc_weight_w20_nodenom_neon, export=1 +function mc_weight_w20_nodenom_neon, export=1 weight_prologue nodenom sub x1, x1, #16 1: @@ -505,7 +505,7 @@ ret endfunc -function x264_mc_weight_w16_nodenom_neon, export=1 +function mc_weight_w16_nodenom_neon, export=1 weight_prologue nodenom 1: subs w9, w9, #2 @@ -529,7 +529,7 @@ ret endfunc -function x264_mc_weight_w8_nodenom_neon, export=1 +function mc_weight_w8_nodenom_neon, export=1 weight_prologue nodenom 1: subs w9, w9, #2 @@ -547,7 +547,7 @@ ret endfunc -function x264_mc_weight_w4_nodenom_neon, export=1 +function mc_weight_w4_nodenom_neon, export=1 weight_prologue nodenom 1: subs w9, w9, #2 @@ -568,7 +568,7 @@ .endm .macro weight_simple name op -function x264_mc_weight_w20_\name\()_neon, export=1 +function mc_weight_w20_\name\()_neon, export=1 weight_simple_prologue 1: subs w5, w5, #2 @@ -588,7 +588,7 @@ ret endfunc -function x264_mc_weight_w16_\name\()_neon, export=1 +function mc_weight_w16_\name\()_neon, export=1 weight_simple_prologue 1: subs w5, w5, #2 @@ -602,7 +602,7 @@ ret endfunc -function x264_mc_weight_w8_\name\()_neon, export=1 +function mc_weight_w8_\name\()_neon, export=1 weight_simple_prologue 1: subs w5, w5, #2 @@ -616,7 +616,7 @@ ret endfunc -function x264_mc_weight_w4_\name\()_neon, export=1 +function mc_weight_w4_\name\()_neon, export=1 weight_simple_prologue 1: subs w5, w5, #2 @@ -635,7 +635,7 @@ // void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height ) -function x264_mc_copy_w4_neon, export=1 +function mc_copy_w4_neon, export=1 1: subs w4, w4, #4 ld1 {v0.s}[0], [x2], x3 @@ -650,7 +650,7 @@ ret endfunc -function x264_mc_copy_w8_neon, export=1 +function mc_copy_w8_neon, export=1 1: subs w4, w4, #4 ld1 {v0.8b}, [x2], x3 ld1 {v1.8b}, [x2], x3 @@ -664,7 +664,7 @@ ret endfunc -function x264_mc_copy_w16_neon, export=1 +function mc_copy_w16_neon, export=1 1: subs w4, w4, #4 ld1 {v0.16b}, [x2], x3 ld1 {v1.16b}, [x2], x3 @@ -678,11 +678,11 @@ ret endfunc -// void x264_mc_chroma_neon( uint8_t *dst_u, uint8_t *dst_v, -// intptr_t i_dst_stride, -// uint8_t *src, intptr_t i_src_stride, -// int dx, int dy, int i_width, int i_height ); -function x264_mc_chroma_neon, export=1 +// void mc_chroma( uint8_t *dst_u, uint8_t *dst_v, +// intptr_t i_dst_stride, +// uint8_t *src, intptr_t i_src_stride, +// int dx, int dy, int i_width, int i_height ); +function mc_chroma_neon, export=1 ldr w15, [sp] // height sbfx x12, x6, #3, #29 // asr(3) and sign extend sbfx x11, x5, #3, #29 // asr(3) and sign extend @@ -718,9 +718,9 @@ function mc_chroma_w\width\()_neon // since the element size varies, there's a different index for the 2nd store .if \width == 4 - .set st2, 1 + .set idx2, 1 .else - .set st2, 2 + .set idx2, 2 .endif CHROMA_MC_START b.eq 2f @@ -785,10 +785,10 @@ //pld [x3] //pld [x3, x4] - st1 {v16.\vsize}[0], [x0], x2 - st1 {v16.\vsize}[st2], [x1], x2 - st1 {v17.\vsize}[0], [x0], x2 - st1 {v17.\vsize}[st2], [x1], x2 + st1 {v16.\vsize}[0], [x0], x2 + st1 {v16.\vsize}[idx2], [x1], x2 + st1 {v17.\vsize}[0], [x0], x2 + st1 {v17.\vsize}[idx2], [x1], x2 b.gt 1b ret @@ -820,10 +820,10 @@ //pld [x3] //pld [x3, x4] - st1 {v16.\vsize}[0], [x0], x2 - st1 {v16.\vsize}[st2], [x0], x2 - st1 {v17.\vsize}[0], [x1], x2 - st1 {v17.\vsize}[st2], [x1], x2 + st1 {v16.\vsize}[0], [x0], x2 + st1 {v16.\vsize}[idx2], [x0], x2 + st1 {v17.\vsize}[0], [x1], x2 + st1 {v17.\vsize}[idx2], [x1], x2 b.gt 3b ret @@ -853,10 +853,10 @@ //pld [x3] //pld [x3, x4] - st1 {v16.\vsize}[0], [x0], x2 - st1 {v16.\vsize}[st2], [x0], x2 - st1 {v17.\vsize}[0], [x1], x2 - st1 {v17.\vsize}[st2], [x1], x2 + st1 {v16.\vsize}[0], [x0], x2 + st1 {v16.\vsize}[idx2], [x0], x2 + st1 {v17.\vsize}[0], [x1], x2 + st1 {v17.\vsize}[idx2], [x1], x2 b.gt 5b ret @@ -1016,9 +1016,9 @@ ret endfunc -//void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, -// intptr_t stride, int width, int height, int16_t *buf ) -function x264_hpel_filter_neon, export=1 +// void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, +// intptr_t stride, int width, int height, int16_t *buf ) +function hpel_filter_neon, export=1 ubfm x9, x3, #0, #3 add w15, w5, w9 sub x13, x3, x9 // align src @@ -1158,7 +1158,7 @@ // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, // uint8_t *dstv, uint8_t *dstc, intptr_t src_stride, // intptr_t dst_stride, int width, int height ) -function x264_frame_init_lowres_core_neon, export=1 +function frame_init_lowres_core_neon, export=1 ldr w8, [sp] sub x10, x6, w7, uxtw // dst_stride - width and x10, x10, #~15 @@ -1233,12 +1233,12 @@ ret endfunc -function x264_load_deinterleave_chroma_fenc_neon, export=1 +function load_deinterleave_chroma_fenc_neon, export=1 mov x4, #FENC_STRIDE/2 b load_deinterleave_chroma endfunc -function x264_load_deinterleave_chroma_fdec_neon, export=1 +function load_deinterleave_chroma_fdec_neon, export=1 mov x4, #FDEC_STRIDE/2 load_deinterleave_chroma: ld2 {v0.8b,v1.8b}, [x1], x2 @@ -1253,7 +1253,7 @@ ret endfunc -function x264_plane_copy_core_neon, export=1 +function plane_copy_core_neon, export=1 add w8, w4, #15 // 32-bit write clears the upper 32-bit the register and w4, w8, #~15 // safe use of the full reg since negative width makes no sense @@ -1282,7 +1282,7 @@ ret endfunc -function x264_plane_copy_swap_core_neon, export=1 +function plane_copy_swap_core_neon, export=1 lsl w4, w4, #1 sub x1, x1, x4 sub x3, x3, x4 @@ -1310,7 +1310,7 @@ ret endfunc -function x264_plane_copy_deinterleave_neon, export=1 +function plane_copy_deinterleave_neon, export=1 add w9, w6, #15 and w9, w9, #0xfffffff0 sub x1, x1, x9 @@ -1349,7 +1349,7 @@ b.gt 1b .endm -function x264_plane_copy_deinterleave_rgb_neon, export=1 +function plane_copy_deinterleave_rgb_neon, export=1 #if SYS_MACOSX ldr w8, [sp] ldp w9, w10, [sp, #4] @@ -1381,7 +1381,7 @@ ret endfunc -function x264_plane_copy_interleave_core_neon, export=1 +function plane_copy_interleave_core_neon, export=1 add w9, w6, #15 and w9, w9, #0xfffffff0 sub x1, x1, x9, lsl #1 @@ -1404,7 +1404,7 @@ ret endfunc -function x264_store_interleave_chroma_neon, export=1 +function store_interleave_chroma_neon, export=1 mov x5, #FDEC_STRIDE 1: ld1 {v0.8b}, [x2], x5 @@ -1431,7 +1431,7 @@ add v0.8h, v0.8h, v5.8h .endm -function x264_integral_init4h_neon, export=1 +function integral_init4h_neon, export=1 sub x3, x0, x2, lsl #1 ld1 {v6.8b,v7.8b}, [x1], #16 1: @@ -1466,7 +1466,7 @@ add v0.8h, v0.8h, \s\().8h .endm -function x264_integral_init8h_neon, export=1 +function integral_init8h_neon, export=1 sub x3, x0, x2, lsl #1 ld1 {v16.8b,v17.8b}, [x1], #16 1: @@ -1483,7 +1483,7 @@ ret endfunc -function x264_integral_init4v_neon, export=1 +function integral_init4v_neon, export=1 mov x3, x0 add x4, x0, x2, lsl #3 add x8, x0, x2, lsl #4 @@ -1518,7 +1518,7 @@ ret endfunc -function x264_integral_init8v_neon, export=1 +function integral_init8v_neon, export=1 add x2, x0, x1, lsl #4 sub x1, x1, #8 ands x3, x1, #16 - 1 @@ -1542,7 +1542,7 @@ ret endfunc -function x264_mbtree_propagate_cost_neon, export=1 +function mbtree_propagate_cost_neon, export=1 ld1r {v5.4s}, [x5] 8: subs w6, w6, #8 @@ -1593,7 +1593,7 @@ .short 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 endconst -function x264_mbtree_propagate_list_internal_neon, export=1 +function mbtree_propagate_list_internal_neon, export=1 movrel x11, pw_0to15 dup v31.8h, w4 // bipred_weight movi v30.8h, #0xc0, lsl #8 @@ -1659,7 +1659,7 @@ ret endfunc -function x264_memcpy_aligned_neon, export=1 +function memcpy_aligned_neon, export=1 tst x2, #16 b.eq 32f sub x2, x2, #16 @@ -1684,7 +1684,7 @@ ret endfunc -function x264_memzero_aligned_neon, export=1 +function memzero_aligned_neon, export=1 movi v0.16b, #0 movi v1.16b, #0 1: @@ -1698,7 +1698,7 @@ endfunc // void mbtree_fix8_pack( int16_t *dst, float *src, int count ) -function x264_mbtree_fix8_pack_neon, export=1 +function mbtree_fix8_pack_neon, export=1 subs w3, w2, #8 b.lt 2f 1: @@ -1726,7 +1726,7 @@ endfunc // void mbtree_fix8_unpack( float *dst, int16_t *src, int count ) -function x264_mbtree_fix8_unpack_neon, export=1 +function mbtree_fix8_unpack_neon, export=1 subs w3, w2, #8 b.lt 2f 1: diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc-c.c x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-c.c --- x264-0.152.2854+gite9a5903/common/aarch64/mc-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,54 +27,96 @@ #include "common/common.h" #include "mc.h" +#define x264_prefetch_ref_aarch64 x264_template(prefetch_ref_aarch64) void x264_prefetch_ref_aarch64( uint8_t *, intptr_t, int ); +#define x264_prefetch_fenc_420_aarch64 x264_template(prefetch_fenc_420_aarch64) void x264_prefetch_fenc_420_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_prefetch_fenc_422_aarch64 x264_template(prefetch_fenc_422_aarch64) void x264_prefetch_fenc_422_aarch64( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon) void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n ); +#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon) void x264_memzero_aligned_neon( void *dst, size_t n ); +#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon) void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon) void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon) void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon) void x264_pixel_avg_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon) void x264_pixel_avg_8x4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon) void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon) void x264_pixel_avg_4x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon) void x264_pixel_avg_4x4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon) void x264_pixel_avg_4x2_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon) void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon) void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon) void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon) void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon) void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon) void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon) void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon) void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon) void x264_plane_copy_interleave_core_neon( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon) void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); +#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon) void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon) void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon) +#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon) +#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon) +#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon) +#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon) +#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon) +#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon) +#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon) +#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon) +#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon) +#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon) +#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon) +#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon) +#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon) +#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon) +#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon) #define MC_WEIGHT(func)\ void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ \ -static void (* x264_mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\ +static void (* mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\ {\ x264_mc_weight_w4##func##_neon,\ x264_mc_weight_w4##func##_neon,\ @@ -84,50 +126,64 @@ x264_mc_weight_w20##func##_neon,\ }; +#if !HIGH_BIT_DEPTH MC_WEIGHT() MC_WEIGHT(_nodenom) MC_WEIGHT(_offsetadd) MC_WEIGHT(_offsetsub) +#endif +#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon) void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon) void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon) void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_chroma_neon x264_template(mc_chroma_neon) void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int ); +#define x264_integral_init4h_neon x264_template(integral_init4h_neon) void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t ); +#define x264_integral_init4v_neon x264_template(integral_init4v_neon) void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t ); +#define x264_integral_init8h_neon x264_template(integral_init8h_neon) void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t ); +#define x264_integral_init8v_neon x264_template(integral_init8v_neon) void x264_integral_init8v_neon( uint16_t *, intptr_t ); +#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon) void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int ); +#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon) void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int ); +#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon) void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count ); +#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon) void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count ); #if !HIGH_BIT_DEPTH -static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w ) +static void weight_cache_neon( x264_t *h, x264_weight_t *w ) { if( w->i_scale == 1<i_denom ) { if( w->i_offset < 0 ) { - w->weightfn = x264_mc_offsetsub_wtab_neon; + w->weightfn = mc_offsetsub_wtab_neon; w->cachea[0] = -w->i_offset; } else { - w->weightfn = x264_mc_offsetadd_wtab_neon; + w->weightfn = mc_offsetadd_wtab_neon; w->cachea[0] = w->i_offset; } } else if( !w->i_denom ) - w->weightfn = x264_mc_nodenom_wtab_neon; + w->weightfn = mc_nodenom_wtab_neon; else - w->weightfn = x264_mc_wtab_neon; + w->weightfn = mc_wtab_neon; } -static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) = +static void (* const pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) = { NULL, x264_pixel_avg2_w4_neon, @@ -137,7 +193,7 @@ x264_pixel_avg2_w20_neon, }; -static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) = +static void (* const mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) = { NULL, x264_mc_copy_w4_neon, @@ -160,7 +216,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */ { uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); - x264_pixel_avg_wtab_neon[i_width>>2]( + pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); if( weight->weightfn ) @@ -169,7 +225,7 @@ else if( weight->weightfn ) weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height ); else - x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height ); + mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height ); } static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, @@ -186,7 +242,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */ { uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); - x264_pixel_avg_wtab_neon[i_width>>2]( + pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); if( weight->weightfn ) @@ -205,6 +261,7 @@ } } +#define x264_hpel_filter_neon x264_template(hpel_filter_neon) void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf ); @@ -212,9 +269,8 @@ PLANE_COPY(16, neon) PLANE_COPY_SWAP(16, neon) PLANE_INTERLEAVE(neon) -#endif // !HIGH_BIT_DEPTH - PROPAGATE_LIST(neon) +#endif // !HIGH_BIT_DEPTH void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ) { @@ -234,11 +290,11 @@ pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon; pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon; - pf->plane_copy = x264_plane_copy_neon; - pf->plane_copy_swap = x264_plane_copy_swap_neon; + pf->plane_copy = plane_copy_neon; + pf->plane_copy_swap = plane_copy_swap_neon; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; - pf->plane_copy_interleave = x264_plane_copy_interleave_neon; + pf->plane_copy_interleave = plane_copy_interleave_neon; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon; pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon; @@ -254,10 +310,10 @@ pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_neon; pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_neon; - pf->weight = x264_mc_wtab_neon; - pf->offsetadd = x264_mc_offsetadd_wtab_neon; - pf->offsetsub = x264_mc_offsetsub_wtab_neon; - pf->weight_cache = x264_weight_cache_neon; + pf->weight = mc_wtab_neon; + pf->offsetadd = mc_offsetadd_wtab_neon; + pf->offsetsub = mc_offsetsub_wtab_neon; + pf->weight_cache = weight_cache_neon; pf->mc_chroma = x264_mc_chroma_neon; pf->mc_luma = mc_luma_neon; @@ -271,7 +327,7 @@ pf->integral_init8v = x264_integral_init8v_neon; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon; + pf->mbtree_propagate_list = mbtree_propagate_list_neon; pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_neon; pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_neon; diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/mc.h x264-0.158.2988+git-20191101.7817004/common/aarch64/mc.h --- x264-0.152.2854+gite9a5903/common/aarch64/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * @@ -26,6 +26,7 @@ #ifndef X264_AARCH64_MC_H #define X264_AARCH64_MC_H +#define x264_mc_init_aarch64 x264_template(mc_init_aarch64) void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/pixel-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/pixel-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -97,7 +97,7 @@ .endm .macro SAD_FUNC w, h, name -function x264_pixel_sad\name\()_\w\()x\h\()_neon, export=1 +function pixel_sad\name\()_\w\()x\h\()_neon, export=1 SAD_START_\w .rept \h / 2 - 1 @@ -195,7 +195,7 @@ .endm .macro SAD_X_FUNC x, w, h -function x264_pixel_sad_x\x\()_\w\()x\h\()_neon, export=1 +function pixel_sad_x\x\()_\w\()x\h\()_neon, export=1 .if \x == 3 mov x6, x5 mov x5, x4 @@ -249,7 +249,7 @@ SAD_X_FUNC 4, 16, 16 -function x264_pixel_vsad_neon, export=1 +function pixel_vsad_neon, export=1 subs w2, w2, #2 ld1 {v0.16b}, [x0], x1 ld1 {v1.16b}, [x0], x1 @@ -273,7 +273,7 @@ ret endfunc -function x264_pixel_asd8_neon, export=1 +function pixel_asd8_neon, export=1 sub w4, w4, #2 ld1 {v0.8b}, [x0], x1 ld1 {v1.8b}, [x2], x3 @@ -379,7 +379,7 @@ .endm .macro SSD_FUNC w h -function x264_pixel_ssd_\w\()x\h\()_neon, export=1 +function pixel_ssd_\w\()x\h\()_neon, export=1 SSD_START_\w .rept \h-2 SSD_\w @@ -402,7 +402,7 @@ SSD_FUNC 16, 16 -function x264_pixel_ssd_nv12_core_neon, export=1 +function pixel_ssd_nv12_core_neon, export=1 sxtw x8, w4 add x8, x8, #8 and x8, x8, #~15 @@ -473,7 +473,7 @@ endfunc .macro pixel_var_8 h -function x264_pixel_var_8x\h\()_neon, export=1 +function pixel_var_8x\h\()_neon, export=1 ld1 {v16.8b}, [x0], x1 ld1 {v17.8b}, [x0], x1 mov x2, \h - 4 @@ -512,14 +512,14 @@ uadalp v1.4s, v28.8h uadalp v2.4s, v29.8h - b x264_var_end + b var_end endfunc .endm pixel_var_8 8 pixel_var_8 16 -function x264_pixel_var_16x16_neon, export=1 +function pixel_var_16x16_neon, export=1 ld1 {v16.16b}, [x0], x1 ld1 {v17.16b}, [x0], x1 mov x2, #14 @@ -556,7 +556,7 @@ uadalp v2.4s, v4.8h endfunc -function x264_var_end +function var_end add v1.4s, v1.4s, v2.4s uaddlv s0, v0.8h uaddlv d1, v1.4s @@ -568,7 +568,7 @@ .macro pixel_var2_8 h -function x264_pixel_var2_8x\h\()_neon, export=1 +function pixel_var2_8x\h\()_neon, export=1 mov x3, #16 ld1 {v16.8b}, [x0], #8 ld1 {v18.8b}, [x1], x3 @@ -637,7 +637,7 @@ pixel_var2_8 16 -function x264_pixel_satd_4x4_neon, export=1 +function pixel_satd_4x4_neon, export=1 ld1 {v1.s}[0], [x2], x3 ld1 {v0.s}[0], [x0], x1 ld1 {v3.s}[0], [x2], x3 @@ -670,7 +670,7 @@ ret endfunc -function x264_pixel_satd_4x8_neon, export=1 +function pixel_satd_4x8_neon, export=1 ld1 {v1.s}[0], [x2], x3 ld1 {v0.s}[0], [x0], x1 ld1 {v3.s}[0], [x2], x3 @@ -687,10 +687,10 @@ ld1 {v4.s}[1], [x0], x1 ld1 {v7.s}[1], [x2], x3 ld1 {v6.s}[1], [x0], x1 - b x264_satd_4x8_8x4_end_neon + b satd_4x8_8x4_end_neon endfunc -function x264_pixel_satd_8x4_neon, export=1 +function pixel_satd_8x4_neon, export=1 ld1 {v1.8b}, [x2], x3 ld1 {v0.8b}, [x0], x1 ld1 {v3.8b}, [x2], x3 @@ -701,7 +701,7 @@ ld1 {v6.8b}, [x0], x1 endfunc -function x264_satd_4x8_8x4_end_neon +function satd_4x8_8x4_end_neon usubl v0.8h, v0.8b, v1.8b usubl v1.8h, v2.8b, v3.8b usubl v2.8h, v4.8b, v5.8b @@ -737,10 +737,10 @@ ret endfunc -function x264_pixel_satd_8x8_neon, export=1 +function pixel_satd_8x8_neon, export=1 mov x4, x30 - bl x264_satd_8x8_neon + bl satd_8x8_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v0.8h, v0.8h, v1.8h @@ -749,15 +749,15 @@ ret x4 endfunc -function x264_pixel_satd_8x16_neon, export=1 +function pixel_satd_8x16_neon, export=1 mov x4, x30 - bl x264_satd_8x8_neon + bl satd_8x8_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v30.8h, v0.8h, v1.8h - bl x264_satd_8x8_neon + bl satd_8x8_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v31.8h, v0.8h, v1.8h @@ -813,12 +813,12 @@ SUMSUB_ABCD \r1, \r3, \r2, \r4, \t1, \t3, \t2, \t4 .endm -function x264_satd_8x8_neon +function satd_8x8_neon load_diff_fly_8x8 endfunc // one vertical hadamard pass and two horizontal -function x264_satd_8x4v_8x8h_neon +function satd_8x4v_8x8h_neon SUMSUB_AB v16.8h, v18.8h, v0.8h, v2.8h SUMSUB_AB v17.8h, v19.8h, v1.8h, v3.8h @@ -856,14 +856,14 @@ ret endfunc -function x264_pixel_satd_16x8_neon, export=1 +function pixel_satd_16x8_neon, export=1 mov x4, x30 - bl x264_satd_16x4_neon + bl satd_16x4_neon add v30.8h, v0.8h, v1.8h add v31.8h, v2.8h, v3.8h - bl x264_satd_16x4_neon + bl satd_16x4_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v30.8h, v30.8h, v0.8h @@ -875,26 +875,26 @@ ret x4 endfunc -function x264_pixel_satd_16x16_neon, export=1 +function pixel_satd_16x16_neon, export=1 mov x4, x30 - bl x264_satd_16x4_neon + bl satd_16x4_neon add v30.8h, v0.8h, v1.8h add v31.8h, v2.8h, v3.8h - bl x264_satd_16x4_neon + bl satd_16x4_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v30.8h, v30.8h, v0.8h add v31.8h, v31.8h, v1.8h - bl x264_satd_16x4_neon + bl satd_16x4_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v30.8h, v30.8h, v0.8h add v31.8h, v31.8h, v1.8h - bl x264_satd_16x4_neon + bl satd_16x4_neon add v0.8h, v0.8h, v1.8h add v1.8h, v2.8h, v3.8h add v30.8h, v30.8h, v0.8h @@ -906,7 +906,7 @@ ret x4 endfunc -function x264_satd_16x4_neon +function satd_16x4_neon ld1 {v1.16b}, [x2], x3 ld1 {v0.16b}, [x0], x1 ld1 {v3.16b}, [x2], x3 @@ -928,10 +928,10 @@ SUMSUB_AB v0.8h, v1.8h, v16.8h, v17.8h SUMSUB_AB v2.8h, v3.8h, v18.8h, v19.8h - b x264_satd_8x4v_8x8h_neon + b satd_8x4v_8x8h_neon endfunc -function x264_pixel_satd_4x16_neon, export=1 +function pixel_satd_4x16_neon, export=1 mov x4, x30 ld1 {v1.s}[0], [x2], x3 ld1 {v0.s}[0], [x0], x1 @@ -977,7 +977,7 @@ SUMSUB_AB v0.8h, v1.8h, v16.8h, v17.8h SUMSUB_AB v2.8h, v3.8h, v18.8h, v19.8h - bl x264_satd_8x4v_8x8h_neon + bl satd_8x4v_8x8h_neon add v30.8h, v0.8h, v1.8h add v31.8h, v2.8h, v3.8h @@ -987,7 +987,7 @@ ret x4 endfunc -function x264_pixel_sa8d_8x8_neon, export=1 +function pixel_sa8d_8x8_neon, export=1 mov x4, x30 bl pixel_sa8d_8x8_neon add v0.8h, v0.8h, v1.8h @@ -998,7 +998,7 @@ ret x4 endfunc -function x264_pixel_sa8d_16x16_neon, export=1 +function pixel_sa8d_16x16_neon, export=1 mov x4, x30 bl pixel_sa8d_8x8_neon uaddlp v30.4s, v0.8h @@ -1120,7 +1120,7 @@ sa8d_satd_8x8 sa8d_satd_8x8 satd_ -function x264_pixel_sa8d_satd_16x16_neon, export=1 +function pixel_sa8d_satd_16x16_neon, export=1 mov x4, x30 bl pixel_sa8d_satd_8x8_neon uaddlp v30.4s, v0.8h @@ -1158,25 +1158,25 @@ endfunc .macro HADAMARD_AC w h -function x264_pixel_hadamard_ac_\w\()x\h\()_neon, export=1 +function pixel_hadamard_ac_\w\()x\h\()_neon, export=1 movrel x5, mask_ac_4_8 mov x4, x30 ld1 {v30.8h,v31.8h}, [x5] movi v28.16b, #0 movi v29.16b, #0 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .if \h > 8 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif .if \w > 8 sub x0, x0, x1, lsl #3 add x0, x0, #8 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif .if \w * \h == 256 sub x0, x0, x1, lsl #4 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif addv s1, v29.4s @@ -1196,7 +1196,7 @@ HADAMARD_AC 16, 16 // v28: satd v29: sa8d v30: mask_ac4 v31: mask_ac8 -function x264_hadamard_ac_8x8_neon +function hadamard_ac_8x8_neon ld1 {v16.8b}, [x0], x1 ld1 {v17.8b}, [x0], x1 ld1 {v18.8b}, [x0], x1 @@ -1288,7 +1288,7 @@ endfunc -function x264_pixel_ssim_4x4x2_core_neon, export=1 +function pixel_ssim_4x4x2_core_neon, export=1 ld1 {v0.8b}, [x0], x1 ld1 {v2.8b}, [x2], x3 umull v16.8h, v0.8b, v0.8b @@ -1347,7 +1347,7 @@ ret endfunc -function x264_pixel_ssim_end4_neon, export=1 +function pixel_ssim_end4_neon, export=1 mov x5, #4 ld1 {v16.4s,v17.4s}, [x0], #32 ld1 {v18.4s,v19.4s}, [x1], #32 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/pixel.h x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel.h --- x264-0.152.2854+gite9a5903/common/aarch64/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,6 +27,44 @@ #ifndef X264_AARCH64_PIXEL_H #define X264_AARCH64_PIXEL_H +#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon) +#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon) +#define x264_pixel_sad_4x16_neon x264_template(pixel_sad_4x16_neon) +#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon) +#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon) +#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon) +#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon) +#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon) +#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon) +#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon) +#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon) +#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon) +#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon) +#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon) +#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon) +#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon) +#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon) +#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon) +#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon) +#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon) +#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon) +#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon) +#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon) +#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon) +#define x264_pixel_satd_4x16_neon x264_template(pixel_satd_4x16_neon) +#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon) +#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon) +#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon) +#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon) +#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon) +#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon) +#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon) +#define x264_pixel_ssd_4x16_neon x264_template(pixel_ssd_4x16_neon) +#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon) +#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon) +#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon) +#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon) +#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon) #define DECL_PIXELS( ret, name, suffix, args ) \ ret x264_pixel_##name##_16x16_##suffix args;\ ret x264_pixel_##name##_16x8_##suffix args;\ @@ -50,30 +88,47 @@ DECL_X1( ssd, neon ) +#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon) void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * ); +#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon) int x264_pixel_vsad_neon( uint8_t *, intptr_t, int ); +#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon) int x264_pixel_sa8d_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon) int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon) uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon) uint64_t x264_pixel_var_8x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon) uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t ); +#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon) uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t ); +#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon) int x264_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * ); +#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon) int x264_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * ); +#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon) uint64_t x264_pixel_hadamard_ac_8x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon) uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon) uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon) uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t ); +#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon) void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t, const uint8_t *, intptr_t, int sums[2][4] ); +#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon) float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width ); +#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon) int x264_pixel_asd8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/predict-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Mans Rullgard @@ -62,8 +62,8 @@ .endm -function x264_predict_4x4_h_aarch64, export=1 - ldrb w1, [x0, #0*FDEC_STRIDE-1] +function predict_4x4_h_aarch64, export=1 + ldurb w1, [x0, #0*FDEC_STRIDE-1] mov w5, #0x01010101 ldrb w2, [x0, #1*FDEC_STRIDE-1] ldrb w3, [x0, #2*FDEC_STRIDE-1] @@ -79,8 +79,8 @@ ret endfunc -function x264_predict_4x4_v_aarch64, export=1 - ldr w1, [x0, #0 - 1 * FDEC_STRIDE] +function predict_4x4_v_aarch64, export=1 + ldur w1, [x0, #0 - 1 * FDEC_STRIDE] str w1, [x0, #0 + 0 * FDEC_STRIDE] str w1, [x0, #0 + 1 * FDEC_STRIDE] str w1, [x0, #0 + 2 * FDEC_STRIDE] @@ -88,9 +88,9 @@ ret endfunc -function x264_predict_4x4_dc_neon, export=1 +function predict_4x4_dc_neon, export=1 sub x1, x0, #FDEC_STRIDE - ldrb w4, [x0, #-1 + 0 * FDEC_STRIDE] + ldurb w4, [x0, #-1 + 0 * FDEC_STRIDE] ldrb w5, [x0, #-1 + 1 * FDEC_STRIDE] ldrb w6, [x0, #-1 + 2 * FDEC_STRIDE] ldrb w7, [x0, #-1 + 3 * FDEC_STRIDE] @@ -110,7 +110,7 @@ ret endfunc -function x264_predict_4x4_dc_top_neon, export=1 +function predict_4x4_dc_top_neon, export=1 sub x1, x0, #FDEC_STRIDE ldr s0, [x1] uaddlv h0, v0.8b @@ -124,7 +124,7 @@ ret endfunc -function x264_predict_4x4_ddr_neon, export=1 +function predict_4x4_ddr_neon, export=1 sub x1, x0, #FDEC_STRIDE+1 mov x7, #FDEC_STRIDE ld1 {v0.8b}, [x1], x7 // # -FDEC_STRIDE-1 @@ -152,7 +152,7 @@ ret endfunc -function x264_predict_4x4_ddl_neon, export=1 +function predict_4x4_ddl_neon, export=1 sub x0, x0, #FDEC_STRIDE mov x7, #FDEC_STRIDE ld1 {v0.8b}, [x0], x7 @@ -171,7 +171,7 @@ ret endfunc -function x264_predict_8x8_dc_neon, export=1 +function predict_8x8_dc_neon, export=1 mov x7, #FDEC_STRIDE ld1 {v0.16b}, [x1], #16 ld1 {v1.8b}, [x1] @@ -187,7 +187,7 @@ ret endfunc -function x264_predict_8x8_h_neon, export=1 +function predict_8x8_h_neon, export=1 mov x7, #FDEC_STRIDE ld1 {v16.16b}, [x1] dup v0.8b, v16.b[14] @@ -209,7 +209,7 @@ ret endfunc -function x264_predict_8x8_v_neon, export=1 +function predict_8x8_v_neon, export=1 add x1, x1, #16 mov x7, #FDEC_STRIDE ld1 {v0.8b}, [x1] @@ -219,7 +219,7 @@ ret endfunc -function x264_predict_8x8_ddl_neon, export=1 +function predict_8x8_ddl_neon, export=1 add x1, x1, #16 mov x7, #FDEC_STRIDE ld1 {v0.16b}, [x1] @@ -248,7 +248,7 @@ ret endfunc -function x264_predict_8x8_ddr_neon, export=1 +function predict_8x8_ddr_neon, export=1 ld1 {v0.16b,v1.16b}, [x1] ext v2.16b, v0.16b, v1.16b, #7 ext v4.16b, v0.16b, v1.16b, #9 @@ -278,7 +278,7 @@ ret endfunc -function x264_predict_8x8_vl_neon, export=1 +function predict_8x8_vl_neon, export=1 add x1, x1, #16 mov x7, #FDEC_STRIDE @@ -309,7 +309,7 @@ ret endfunc -function x264_predict_8x8_vr_neon, export=1 +function predict_8x8_vr_neon, export=1 add x1, x1, #8 mov x7, #FDEC_STRIDE ld1 {v2.16b}, [x1] @@ -343,7 +343,7 @@ ret endfunc -function x264_predict_8x8_hd_neon, export=1 +function predict_8x8_hd_neon, export=1 add x1, x1, #7 mov x7, #FDEC_STRIDE @@ -378,7 +378,7 @@ ret endfunc -function x264_predict_8x8_hu_neon, export=1 +function predict_8x8_hu_neon, export=1 add x1, x1, #7 mov x7, #FDEC_STRIDE ld1 {v7.8b}, [x1] @@ -416,7 +416,7 @@ endfunc -function x264_predict_8x8c_dc_top_neon, export=1 +function predict_8x8c_dc_top_neon, export=1 sub x2, x0, #FDEC_STRIDE mov x1, #FDEC_STRIDE ld1 {v0.8b}, [x2] @@ -429,8 +429,8 @@ b pred8x8c_dc_end endfunc -function x264_predict_8x8c_dc_left_neon, export=1 - ldrb w2, [x0, #0 * FDEC_STRIDE - 1] +function predict_8x8c_dc_left_neon, export=1 + ldurb w2, [x0, #0 * FDEC_STRIDE - 1] ldrb w3, [x0, #1 * FDEC_STRIDE - 1] ldrb w4, [x0, #2 * FDEC_STRIDE - 1] ldrb w5, [x0, #3 * FDEC_STRIDE - 1] @@ -452,10 +452,10 @@ b pred8x8c_dc_end endfunc -function x264_predict_8x8c_dc_neon, export=1 +function predict_8x8c_dc_neon, export=1 mov x1, #FDEC_STRIDE sub x2, x0, #FDEC_STRIDE - ldrb w10, [x0, #0 * FDEC_STRIDE - 1] + ldurb w10, [x0, #0 * FDEC_STRIDE - 1] ldrb w11, [x0, #1 * FDEC_STRIDE - 1] ldrb w12, [x0, #2 * FDEC_STRIDE - 1] ldrb w13, [x0, #3 * FDEC_STRIDE - 1] @@ -498,7 +498,7 @@ ret endfunc -function x264_predict_8x8c_h_neon, export=1 +function predict_8x8c_h_neon, export=1 sub x1, x0, #1 mov x7, #FDEC_STRIDE .rept 4 @@ -510,15 +510,15 @@ ret endfunc -function x264_predict_8x8c_v_aarch64, export=1 - ldr x1, [x0, #-FDEC_STRIDE] +function predict_8x8c_v_aarch64, export=1 + ldur x1, [x0, #-FDEC_STRIDE] .irp c, 0,1,2,3,4,5,6,7 str x1, [x0, #\c * FDEC_STRIDE] .endr ret endfunc -function x264_predict_8x8c_p_neon, export=1 +function predict_8x8c_p_neon, export=1 sub x3, x0, #FDEC_STRIDE mov x1, #FDEC_STRIDE add x2, x3, #4 @@ -568,7 +568,11 @@ .macro loadsum4 wd, t1, t2, t3, x, idx + .if \idx == 0 + ldurb \wd, [\x, #(\idx + 0) * FDEC_STRIDE - 1] + .else ldrb \wd, [\x, #(\idx + 0) * FDEC_STRIDE - 1] + .endif ldrb \t1, [\x, #(\idx + 1) * FDEC_STRIDE - 1] ldrb \t2, [\x, #(\idx + 2) * FDEC_STRIDE - 1] ldrb \t3, [\x, #(\idx + 3) * FDEC_STRIDE - 1] @@ -577,7 +581,7 @@ add \wd, \wd, \t1 .endm -function x264_predict_8x16c_h_neon, export=1 +function predict_8x16c_h_neon, export=1 sub x2, x0, #1 add x3, x0, #FDEC_STRIDE - 1 mov x7, #2 * FDEC_STRIDE @@ -595,7 +599,7 @@ ret endfunc -function x264_predict_8x16c_v_neon, export=1 +function predict_8x16c_v_neon, export=1 sub x1, x0, #FDEC_STRIDE mov x2, #2 * FDEC_STRIDE ld1 {v0.8b}, [x1], x2 @@ -606,7 +610,7 @@ ret endfunc -function x264_predict_8x16c_p_neon, export=1 +function predict_8x16c_p_neon, export=1 movrel x4, p16weight ld1 {v17.8h}, [x4] sub x3, x0, #FDEC_STRIDE @@ -673,7 +677,7 @@ ret endfunc -function x264_predict_8x16c_dc_neon, export=1 +function predict_8x16c_dc_neon, export=1 mov x1, #FDEC_STRIDE sub x10, x0, #FDEC_STRIDE loadsum4 w2, w3, w4, w5, x0, 0 @@ -718,9 +722,9 @@ ret endfunc -function x264_predict_8x16c_dc_left_neon, export=1 +function predict_8x16c_dc_left_neon, export=1 mov x1, #FDEC_STRIDE - ldrb w2, [x0, # 0 * FDEC_STRIDE - 1] + ldurb w2, [x0, # 0 * FDEC_STRIDE - 1] ldrb w3, [x0, # 1 * FDEC_STRIDE - 1] ldrb w4, [x0, # 2 * FDEC_STRIDE - 1] ldrb w5, [x0, # 3 * FDEC_STRIDE - 1] @@ -772,7 +776,7 @@ ret endfunc -function x264_predict_8x16c_dc_top_neon, export=1 +function predict_8x16c_dc_top_neon, export=1 sub x2, x0, #FDEC_STRIDE mov x1, #FDEC_STRIDE ld1 {v0.8b}, [x2] @@ -789,7 +793,7 @@ endfunc -function x264_predict_16x16_dc_top_neon, export=1 +function predict_16x16_dc_top_neon, export=1 sub x2, x0, #FDEC_STRIDE mov x1, #FDEC_STRIDE ld1 {v0.16b}, [x2] @@ -799,7 +803,7 @@ b pred16x16_dc_end endfunc -function x264_predict_16x16_dc_left_neon, export=1 +function predict_16x16_dc_left_neon, export=1 sub x2, x0, #1 mov x1, #FDEC_STRIDE ldcol.16 v0, x2, x1 @@ -809,7 +813,7 @@ b pred16x16_dc_end endfunc -function x264_predict_16x16_dc_neon, export=1 +function predict_16x16_dc_neon, export=1 sub x3, x0, #FDEC_STRIDE sub x2, x0, #1 mov x1, #FDEC_STRIDE @@ -827,7 +831,7 @@ ret endfunc -function x264_predict_16x16_h_neon, export=1 +function predict_16x16_h_neon, export=1 sub x1, x0, #1 mov x7, #FDEC_STRIDE .rept 8 @@ -839,7 +843,7 @@ ret endfunc -function x264_predict_16x16_v_neon, export=1 +function predict_16x16_v_neon, export=1 sub x0, x0, #FDEC_STRIDE mov x7, #FDEC_STRIDE ld1 {v0.16b}, [x0], x7 @@ -849,7 +853,7 @@ ret endfunc -function x264_predict_16x16_p_neon, export=1 +function predict_16x16_p_neon, export=1 sub x3, x0, #FDEC_STRIDE mov x1, #FDEC_STRIDE add x2, x3, #8 diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict-c.c x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-c.c --- x264-0.152.2854+gite9a5903/common/aarch64/predict-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -28,29 +28,6 @@ #include "predict.h" #include "pixel.h" -void x264_predict_4x4_dc_top_neon( uint8_t *src ); -void x264_predict_4x4_ddr_neon( uint8_t *src ); -void x264_predict_4x4_ddl_neon( uint8_t *src ); - -void x264_predict_8x8c_dc_top_neon( uint8_t *src ); -void x264_predict_8x8c_dc_left_neon( uint8_t *src ); -void x264_predict_8x8c_p_neon( uint8_t *src ); - -void x264_predict_8x16c_dc_left_neon( uint8_t *src ); -void x264_predict_8x16c_dc_top_neon( uint8_t *src ); -void x264_predict_8x16c_p_neon( uint8_t *src ); - -void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] ); -void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] ); -void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] ); -void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] ); -void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] ); -void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] ); - -void x264_predict_16x16_dc_top_neon( uint8_t *src ); -void x264_predict_16x16_dc_left_neon( uint8_t *src ); -void x264_predict_16x16_p_neon( uint8_t *src ); - void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] ) { #if !HIGH_BIT_DEPTH diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/predict.h x264-0.158.2988+git-20191101.7817004/common/aarch64/predict.h --- x264-0.152.2854+gite9a5903/common/aarch64/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,8 +27,11 @@ #ifndef X264_AARCH64_PREDICT_H #define X264_AARCH64_PREDICT_H +#define x264_predict_4x4_h_aarch64 x264_template(predict_4x4_h_aarch64) void x264_predict_4x4_h_aarch64( uint8_t *src ); +#define x264_predict_4x4_v_aarch64 x264_template(predict_4x4_v_aarch64) void x264_predict_4x4_v_aarch64( uint8_t *src ); +#define x264_predict_8x8c_v_aarch64 x264_template(predict_8x8c_v_aarch64) void x264_predict_8x8c_v_aarch64( uint8_t *src ); // for the merged 4x4 intra sad/satd which expects unified suffix @@ -36,23 +39,81 @@ #define x264_predict_4x4_v_neon x264_predict_4x4_v_aarch64 #define x264_predict_8x8c_v_neon x264_predict_8x8c_v_aarch64 +#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon) +void x264_predict_4x4_dc_top_neon( uint8_t *src ); +#define x264_predict_4x4_ddr_neon x264_template(predict_4x4_ddr_neon) +void x264_predict_4x4_ddr_neon( uint8_t *src ); +#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon) +void x264_predict_4x4_ddl_neon( uint8_t *src ); + +#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon) +void x264_predict_8x8c_dc_top_neon( uint8_t *src ); +#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon) +void x264_predict_8x8c_dc_left_neon( uint8_t *src ); +#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon) +void x264_predict_8x8c_p_neon( uint8_t *src ); + +#define x264_predict_8x16c_dc_left_neon x264_template(predict_8x16c_dc_left_neon) +void x264_predict_8x16c_dc_left_neon( uint8_t *src ); +#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon) +void x264_predict_8x16c_dc_top_neon( uint8_t *src ); +#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon) +void x264_predict_8x16c_p_neon( uint8_t *src ); + +#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon) +void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon) +void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon) +void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon) +void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon) +void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon) +void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] ); + +#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon) +void x264_predict_16x16_dc_top_neon( uint8_t *src ); +#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon) +void x264_predict_16x16_dc_left_neon( uint8_t *src ); +#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon) +void x264_predict_16x16_p_neon( uint8_t *src ); + +#define x264_predict_4x4_dc_neon x264_template(predict_4x4_dc_neon) void x264_predict_4x4_dc_neon( uint8_t *src ); +#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon) void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon) void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon) void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon) void x264_predict_8x8c_dc_neon( uint8_t *src ); +#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon) void x264_predict_8x8c_h_neon( uint8_t *src ); +#define x264_predict_8x16c_v_neon x264_template(predict_8x16c_v_neon) void x264_predict_8x16c_v_neon( uint8_t *src ); +#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon) void x264_predict_8x16c_h_neon( uint8_t *src ); +#define x264_predict_8x16c_dc_neon x264_template(predict_8x16c_dc_neon) void x264_predict_8x16c_dc_neon( uint8_t *src ); +#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon) void x264_predict_16x16_v_neon( uint8_t *src ); +#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon) void x264_predict_16x16_h_neon( uint8_t *src ); +#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon) void x264_predict_16x16_dc_neon( uint8_t *src ); +#define x264_predict_4x4_init_aarch64 x264_template(predict_4x4_init_aarch64) void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] ); +#define x264_predict_8x8_init_aarch64 x264_template(predict_8x8_init_aarch64) void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); +#define x264_predict_8x8c_init_aarch64 x264_template(predict_8x8c_init_aarch64) void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x16c_init_aarch64 x264_template(predict_8x16c_init_aarch64) void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] ); +#define x264_predict_16x16_init_aarch64 x264_template(predict_16x16_init_aarch64) void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] ); #endif /* X264_AARCH64_PREDICT_H */ diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/quant-a.S x264-0.158.2988+git-20191101.7817004/common/aarch64/quant-a.S --- x264-0.152.2854+gite9a5903/common/aarch64/quant-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/quant-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -57,7 +57,7 @@ .endm // quant_2x2_dc( int16_t dct[4], int mf, int bias ) -function x264_quant_2x2_dc_neon, export=1 +function quant_2x2_dc_neon, export=1 ld1 {v0.4h}, [x0] dup v2.4h, w2 dup v1.4h, w1 @@ -73,7 +73,7 @@ endfunc // quant_4x4_dc( int16_t dct[16], int mf, int bias ) -function x264_quant_4x4_dc_neon, export=1 +function quant_4x4_dc_neon, export=1 ld1 {v16.8h,v17.8h}, [x0] abs v18.8h, v16.8h abs v19.8h, v17.8h @@ -85,7 +85,7 @@ endfunc // quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) -function x264_quant_4x4_neon, export=1 +function quant_4x4_neon, export=1 ld1 {v16.8h,v17.8h}, [x0] abs v18.8h, v16.8h abs v19.8h, v17.8h @@ -97,7 +97,7 @@ endfunc // quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ) -function x264_quant_4x4x4_neon, export=1 +function quant_4x4x4_neon, export=1 ld1 {v16.8h,v17.8h}, [x0] abs v18.8h, v16.8h abs v19.8h, v17.8h @@ -140,7 +140,7 @@ endfunc // quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) -function x264_quant_8x8_neon, export=1 +function quant_8x8_neon, export=1 ld1 {v16.8h,v17.8h}, [x0] abs v18.8h, v16.8h abs v19.8h, v17.8h @@ -177,7 +177,7 @@ // dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp ) .macro DEQUANT size bits -function x264_dequant_\size\()_neon, export=1 +function dequant_\size\()_neon, export=1 DEQUANT_START \bits+2, \bits .ifc \size, 8x8 mov w2, #4 @@ -258,7 +258,7 @@ DEQUANT 8x8, 6 // dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp ) -function x264_dequant_4x4_dc_neon, export=1 +function dequant_4x4_dc_neon, export=1 DEQUANT_START 6, 6, yes b.lt dequant_4x4_dc_rshift @@ -303,9 +303,9 @@ endfunc .macro decimate_score_1x size -function x264_decimate_score\size\()_neon, export=1 +function decimate_score\size\()_neon, export=1 ld1 {v0.8h,v1.8h}, [x0] - movrel x5, X(x264_decimate_table4) + movrel x5, X264(decimate_table4) movi v3.16b, #0x01 sqxtn v0.8b, v0.8h sqxtn2 v0.16b, v1.8h @@ -348,7 +348,7 @@ .byte 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 endconst -function x264_decimate_score64_neon, export=1 +function decimate_score64_neon, export=1 ld1 {v0.8h,v1.8h}, [x0], #32 ld1 {v2.8h,v3.8h}, [x0], #32 ld1 {v4.8h,v5.8h}, [x0], #32 @@ -391,7 +391,7 @@ mvn x1, x1 mov w0, #0 cbz x1, 0f - movrel x5, X(x264_decimate_table8) + movrel x5, X264(decimate_table8) 1: clz x3, x1 lsl x1, x1, x3 @@ -407,7 +407,7 @@ endfunc // int coeff_last( int16_t *l ) -function x264_coeff_last4_aarch64, export=1 +function coeff_last4_aarch64, export=1 ldr x2, [x0] mov w4, #3 clz x0, x2 @@ -415,7 +415,7 @@ ret endfunc -function x264_coeff_last8_aarch64, export=1 +function coeff_last8_aarch64, export=1 ldr x3, [x0, #8] mov w4, #7 clz x2, x3 @@ -430,7 +430,7 @@ endfunc .macro COEFF_LAST_1x size -function x264_coeff_last\size\()_neon, export=1 +function coeff_last\size\()_neon, export=1 .if \size == 15 sub x0, x0, #2 .endif @@ -450,7 +450,7 @@ COEFF_LAST_1x 15 COEFF_LAST_1x 16 -function x264_coeff_last64_neon, export=1 +function coeff_last64_neon, export=1 ld1 {v0.8h,v1.8h,v2.8h,v3.8h}, [x0], 64 movi v31.8h, #8 movi v30.8h, #1 @@ -523,7 +523,7 @@ mov w0, w7 .endm -function x264_coeff_level_run4_aarch64, export=1 +function coeff_level_run4_aarch64, export=1 ldr x2, [x0] coeff_level_run_start 4 @@ -534,7 +534,7 @@ endfunc .macro X264_COEFF_LEVEL_RUN size -function x264_coeff_level_run\size\()_neon, export=1 +function coeff_level_run\size\()_neon, export=1 .if \size == 15 sub x0, x0, #2 .endif @@ -566,7 +566,7 @@ X264_COEFF_LEVEL_RUN 15 X264_COEFF_LEVEL_RUN 16 -function x264_denoise_dct_neon, export=1 +function denoise_dct_neon, export=1 1: subs w3, w3, #16 ld1 {v0.8h,v1.8h}, [x0] ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x1] diff -Nru x264-0.152.2854+gite9a5903/common/aarch64/quant.h x264-0.158.2988+git-20191101.7817004/common/aarch64/quant.h --- x264-0.152.2854+gite9a5903/common/aarch64/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/aarch64/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,33 +27,55 @@ #ifndef X264_AARCH64_QUANT_H #define X264_AARCH64_QUANT_H +#define x264_quant_2x2_dc_aarch64 x264_template(quant_2x2_dc_aarch64) int x264_quant_2x2_dc_aarch64( int16_t dct[4], int mf, int bias ); +#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon) int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias ); +#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon) int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias ); +#define x264_quant_4x4_neon x264_template(quant_4x4_neon) int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon) int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_8x8_neon x264_template(quant_8x8_neon) int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ); +#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon) void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon) void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon) void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_decimate_score15_neon x264_template(decimate_score15_neon) int x264_decimate_score15_neon( int16_t * ); +#define x264_decimate_score16_neon x264_template(decimate_score16_neon) int x264_decimate_score16_neon( int16_t * ); +#define x264_decimate_score64_neon x264_template(decimate_score64_neon) int x264_decimate_score64_neon( int16_t * ); +#define x264_coeff_last4_aarch64 x264_template(coeff_last4_aarch64) int x264_coeff_last4_aarch64( int16_t * ); +#define x264_coeff_last8_aarch64 x264_template(coeff_last8_aarch64) int x264_coeff_last8_aarch64( int16_t * ); +#define x264_coeff_last15_neon x264_template(coeff_last15_neon) int x264_coeff_last15_neon( int16_t * ); +#define x264_coeff_last16_neon x264_template(coeff_last16_neon) int x264_coeff_last16_neon( int16_t * ); +#define x264_coeff_last64_neon x264_template(coeff_last64_neon) int x264_coeff_last64_neon( int16_t * ); +#define x264_coeff_level_run4_aarch64 x264_template(coeff_level_run4_aarch64) int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * ); +#define x264_coeff_level_run8_neon x264_template(coeff_level_run8_neon) int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * ); +#define x264_coeff_level_run15_neon x264_template(coeff_level_run15_neon) int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * ); +#define x264_coeff_level_run16_neon x264_template(coeff_level_run16_neon) int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * ); +#define x264_denoise_dct_neon x264_template(denoise_dct_neon) void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/asm.S x264-0.158.2988+git-20191101.7817004/common/arm/asm.S --- x264-0.152.2854+gite9a5903/common/arm/asm.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/asm.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: arm utility macros ***************************************************************************** - * Copyright (C) 2008-2017 x264 project + * Copyright (C) 2008-2019 x264 project * * Authors: Mans Rullgard * David Conrad @@ -28,17 +28,32 @@ .syntax unified -#ifndef __APPLE__ +#ifdef __ELF__ .arch armv7-a .fpu neon #endif +#define GLUE(a, b) a ## b +#define JOIN(a, b) GLUE(a, b) + #ifdef PREFIX -# define EXTERN_ASM _ +# define BASE _x264_ +# define SYM_PREFIX _ #else -# define EXTERN_ASM +# define BASE x264_ +# define SYM_PREFIX #endif +#ifdef BIT_DEPTH +# define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _) +#else +# define EXTERN_ASM BASE +#endif + +#define X(s) JOIN(EXTERN_ASM, s) +#define X264(s) JOIN(BASE, s) +#define EXT(s) JOIN(SYM_PREFIX, s) + #ifdef __ELF__ # define ELF #else @@ -75,7 +90,11 @@ .macro function name, export=1 .macro endfunc +.if \export +ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name +.else ELF .size \name, . - \name +.endif FUNC .endfunc .purgem endfunc .endm @@ -169,10 +188,6 @@ #endif .endm -#define GLUE(a, b) a ## b -#define JOIN(a, b) GLUE(a, b) -#define X(s) JOIN(EXTERN_ASM, s) - #define FENC_STRIDE 16 #define FDEC_STRIDE 32 diff -Nru x264-0.152.2854+gite9a5903/common/arm/bitstream-a.S x264-0.158.2988+git-20191101.7817004/common/arm/bitstream-a.S --- x264-0.152.2854+gite9a5903/common/arm/bitstream-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/bitstream-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream-a.S: arm bitstream functions ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Janne Grunau * @@ -25,7 +25,7 @@ #include "asm.S" -function x264_nal_escape_neon +function nal_escape_neon push {r4-r5,lr} vmov.u8 q0, #0xff vmov.u8 q8, #4 diff -Nru x264-0.152.2854+gite9a5903/common/arm/bitstream.h x264-0.158.2988+git-20191101.7817004/common/arm/bitstream.h --- x264-0.152.2854+gite9a5903/common/arm/bitstream.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/bitstream.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,32 @@ +/***************************************************************************** + * bitstream.h: arm bitstream functions + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_ARM_BITSTREAM_H +#define X264_ARM_BITSTREAM_H + +#define x264_nal_escape_neon x264_template(nal_escape_neon) +uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/cpu-a.S x264-0.158.2988+git-20191101.7817004/common/arm/cpu-a.S --- x264-0.152.2854+gite9a5903/common/arm/cpu-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/cpu-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu-a.S: arm cpu detection ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * @@ -29,7 +29,7 @@ // done in gas because .fpu neon overrides the refusal to assemble // instructions the selected -march/-mcpu doesn't support -function x264_cpu_neon_test +function cpu_neon_test vadd.i16 q0, q0, q0 bx lr endfunc @@ -37,7 +37,7 @@ // return: 0 on success // 1 if counters were already enabled // 9 if lo-res counters were already enabled -function x264_cpu_enable_armv7_counter, export=0 +function cpu_enable_armv7_counter, export=0 mrc p15, 0, r2, c9, c12, 0 // read PMNC ands r0, r2, #1 andne r0, r2, #9 @@ -50,7 +50,7 @@ bx lr endfunc -function x264_cpu_disable_armv7_counter, export=0 +function cpu_disable_armv7_counter, export=0 mrc p15, 0, r0, c9, c12, 0 // read PMNC bic r0, r0, #1 // disable counters mcr p15, 0, r0, c9, c12, 0 // write PMNC @@ -64,14 +64,14 @@ // return: 0 if transfers neon -> arm transfers take more than 10 cycles // nonzero otherwise -function x264_cpu_fast_neon_mrc_test +function cpu_fast_neon_mrc_test // check for user access to performance counters mrc p15, 0, r0, c9, c14, 0 cmp r0, #0 bxeq lr push {r4-r6,lr} - bl x264_cpu_enable_armv7_counter + bl cpu_enable_armv7_counter ands r1, r0, #8 mov r3, #0 mov ip, #4 @@ -99,7 +99,7 @@ // disable counters if we enabled them ands r0, r0, #1 - bleq x264_cpu_disable_armv7_counter + bleq cpu_disable_armv7_counter lsr r0, r3, #5 cmp r0, #10 diff -Nru x264-0.152.2854+gite9a5903/common/arm/dct-a.S x264-0.158.2988+git-20191101.7817004/common/arm/dct-a.S --- x264-0.152.2854+gite9a5903/common/arm/dct-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/dct-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Martin Storsjo @@ -62,7 +62,7 @@ .endm -function x264_dct4x4dc_neon +function dct4x4dc_neon vld1.64 {d0-d3}, [r0,:128] SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3 SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7 @@ -81,7 +81,7 @@ bx lr endfunc -function x264_idct4x4dc_neon +function idct4x4dc_neon vld1.64 {d0-d3}, [r0,:128] SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3 SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7 @@ -105,7 +105,7 @@ vsub.s16 \d3, \d7, \d5 .endm -function x264_sub4x4_dct_neon +function sub4x4_dct_neon mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE vld1.32 {d0[]}, [r1,:32], r3 @@ -128,7 +128,7 @@ bx lr endfunc -function x264_sub8x4_dct_neon, export=0 +function sub8x4_dct_neon, export=0 vld1.64 {d0}, [r1,:64], r3 vld1.64 {d1}, [r2,:64], ip vsubl.u8 q8, d0, d1 @@ -164,34 +164,34 @@ bx lr endfunc -function x264_sub8x8_dct_neon +function sub8x8_dct_neon push {lr} mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon pop {lr} - b x264_sub8x4_dct_neon + b sub8x4_dct_neon endfunc -function x264_sub16x16_dct_neon +function sub16x16_dct_neon push {lr} mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub r1, r1, #8*FENC_STRIDE-8 sub r2, r2, #8*FDEC_STRIDE-8 - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub r1, r1, #8 sub r2, r2, #8 - bl x264_sub8x4_dct_neon - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon + bl sub8x4_dct_neon sub r1, r1, #8*FENC_STRIDE-8 sub r2, r2, #8*FDEC_STRIDE-8 - bl x264_sub8x4_dct_neon + bl sub8x4_dct_neon pop {lr} - b x264_sub8x4_dct_neon + b sub8x4_dct_neon endfunc @@ -226,7 +226,7 @@ SUMSUB_SHR2 2, q11, q13, q3, q13, q0, q1 .endm -function x264_sub8x8_dct8_neon +function sub8x8_dct8_neon mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d16}, [r1,:64], r3 @@ -278,19 +278,19 @@ bx lr endfunc -function x264_sub16x16_dct8_neon +function sub16x16_dct8_neon push {lr} - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) sub r1, r1, #FENC_STRIDE*8 - 8 sub r2, r2, #FDEC_STRIDE*8 - 8 - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) sub r1, r1, #8 sub r2, r2, #8 - bl X(x264_sub8x8_dct8_neon) + bl X(sub8x8_dct8_neon) pop {lr} sub r1, r1, #FENC_STRIDE*8 - 8 sub r2, r2, #FDEC_STRIDE*8 - 8 - b X(x264_sub8x8_dct8_neon) + b X(sub8x8_dct8_neon) endfunc @@ -303,7 +303,7 @@ vadd.s16 \d6, \d6, \d1 .endm -function x264_add4x4_idct_neon +function add4x4_idct_neon mov r2, #FDEC_STRIDE vld1.64 {d0-d3}, [r1,:128] @@ -335,7 +335,7 @@ bx lr endfunc -function x264_add8x4_idct_neon, export=0 +function add8x4_idct_neon, export=0 vld1.64 {d0-d3}, [r1,:128]! IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3 vld1.64 {d4-d7}, [r1,:128]! @@ -375,29 +375,29 @@ bx lr endfunc -function x264_add8x8_idct_neon +function add8x8_idct_neon mov r2, #FDEC_STRIDE mov ip, lr - bl x264_add8x4_idct_neon + bl add8x4_idct_neon mov lr, ip - b x264_add8x4_idct_neon + b add8x4_idct_neon endfunc -function x264_add16x16_idct_neon +function add16x16_idct_neon mov r2, #FDEC_STRIDE mov ip, lr - bl x264_add8x4_idct_neon - bl x264_add8x4_idct_neon + bl add8x4_idct_neon + bl add8x4_idct_neon sub r0, r0, #8*FDEC_STRIDE-8 - bl x264_add8x4_idct_neon - bl x264_add8x4_idct_neon + bl add8x4_idct_neon + bl add8x4_idct_neon sub r0, r0, #8 - bl x264_add8x4_idct_neon - bl x264_add8x4_idct_neon + bl add8x4_idct_neon + bl add8x4_idct_neon sub r0, r0, #8*FDEC_STRIDE-8 - bl x264_add8x4_idct_neon + bl add8x4_idct_neon mov lr, ip - b x264_add8x4_idct_neon + b add8x4_idct_neon endfunc @@ -435,7 +435,7 @@ SUMSUB_AB q11, q12, q2, q12 .endm -function x264_add8x8_idct8_neon +function add8x8_idct8_neon mov r2, #FDEC_STRIDE vld1.64 {d16-d19}, [r1,:128]! vld1.64 {d20-d23}, [r1,:128]! @@ -497,20 +497,20 @@ bx lr endfunc -function x264_add16x16_idct8_neon +function add16x16_idct8_neon mov ip, lr - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub r0, r0, #8*FDEC_STRIDE-8 - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub r0, r0, #8 - bl X(x264_add8x8_idct8_neon) + bl X(add8x8_idct8_neon) sub r0, r0, #8*FDEC_STRIDE-8 mov lr, ip - b X(x264_add8x8_idct8_neon) + b X(add8x8_idct8_neon) endfunc -function x264_add8x8_idct_dc_neon +function add8x8_idct_dc_neon mov r2, #FDEC_STRIDE vld1.64 {d16}, [r1,:64] vrshr.s16 d16, d16, #6 @@ -593,7 +593,7 @@ vst1.64 {d22-d23}, [r2,:128], r3 .endm -function x264_add16x16_idct_dc_neon +function add16x16_idct_dc_neon mov r2, r0 mov r3, #FDEC_STRIDE vmov.i16 q15, #0 @@ -609,7 +609,7 @@ bx lr endfunc -function x264_sub8x8_dct_dc_neon +function sub8x8_dct_dc_neon mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d16}, [r1,:64], r3 @@ -657,7 +657,7 @@ bx lr endfunc -function x264_sub8x16_dct_dc_neon +function sub8x16_dct_dc_neon mov r3, #FENC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d16}, [r1,:64], r3 @@ -751,7 +751,7 @@ endfunc -function x264_zigzag_scan_4x4_frame_neon +function zigzag_scan_4x4_frame_neon movrel r2, scan4x4_frame vld1.64 {d0-d3}, [r1,:128] vld1.64 {d16-d19}, [r2,:128] diff -Nru x264-0.152.2854+gite9a5903/common/arm/dct.h x264-0.158.2988+git-20191101.7817004/common/arm/dct.h --- x264-0.152.2854+gite9a5903/common/arm/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * @@ -26,28 +26,45 @@ #ifndef X264_ARM_DCT_H #define X264_ARM_DCT_H +#define x264_dct4x4dc_neon x264_template(dct4x4dc_neon) void x264_dct4x4dc_neon( int16_t d[16] ); +#define x264_idct4x4dc_neon x264_template(idct4x4dc_neon) void x264_idct4x4dc_neon( int16_t d[16] ); +#define x264_sub4x4_dct_neon x264_template(sub4x4_dct_neon) void x264_sub4x4_dct_neon( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_neon x264_template(sub8x8_dct_neon) void x264_sub8x8_dct_neon( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_neon x264_template(sub16x16_dct_neon) void x264_sub16x16_dct_neon( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add4x4_idct_neon x264_template(add4x4_idct_neon) void x264_add4x4_idct_neon( uint8_t *p_dst, int16_t dct[16] ); +#define x264_add8x8_idct_neon x264_template(add8x8_idct_neon) void x264_add8x8_idct_neon( uint8_t *p_dst, int16_t dct[4][16] ); +#define x264_add16x16_idct_neon x264_template(add16x16_idct_neon) void x264_add16x16_idct_neon( uint8_t *p_dst, int16_t dct[16][16] ); +#define x264_add8x8_idct_dc_neon x264_template(add8x8_idct_dc_neon) void x264_add8x8_idct_dc_neon( uint8_t *p_dst, int16_t dct[4] ); +#define x264_add16x16_idct_dc_neon x264_template(add16x16_idct_dc_neon) void x264_add16x16_idct_dc_neon( uint8_t *p_dst, int16_t dct[16] ); +#define x264_sub8x8_dct_dc_neon x264_template(sub8x8_dct_dc_neon) void x264_sub8x8_dct_dc_neon( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x16_dct_dc_neon x264_template(sub8x16_dct_dc_neon) void x264_sub8x16_dct_dc_neon( int16_t dct[8], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct8_neon x264_template(sub8x8_dct8_neon) void x264_sub8x8_dct8_neon( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct8_neon x264_template(sub16x16_dct8_neon) void x264_sub16x16_dct8_neon( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add8x8_idct8_neon x264_template(add8x8_idct8_neon) void x264_add8x8_idct8_neon( uint8_t *p_dst, int16_t dct[64] ); +#define x264_add16x16_idct8_neon x264_template(add16x16_idct8_neon) void x264_add16x16_idct8_neon( uint8_t *p_dst, int16_t dct[4][64] ); +#define x264_zigzag_scan_4x4_frame_neon x264_template(zigzag_scan_4x4_frame_neon) void x264_zigzag_scan_4x4_frame_neon( int16_t level[16], int16_t dct[16] ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/deblock-a.S x264-0.158.2988+git-20191101.7817004/common/arm/deblock-a.S --- x264-0.152.2854+gite9a5903/common/arm/deblock-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/deblock-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: arm deblocking ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Mans Rullgard * Martin Storsjo @@ -117,7 +117,7 @@ vqmovun.s16 d1, q12 .endm -function x264_deblock_v_luma_neon +function deblock_v_luma_neon h264_loop_filter_start vld1.64 {d0, d1}, [r0,:128], r1 @@ -143,7 +143,7 @@ bx lr endfunc -function x264_deblock_h_luma_neon +function deblock_h_luma_neon h264_loop_filter_start sub r0, r0, #4 @@ -324,7 +324,7 @@ .endm -function x264_deblock_v_luma_intra_neon +function deblock_v_luma_intra_neon push {lr} vld1.64 {d0, d1}, [r0,:128], r1 vld1.64 {d2, d3}, [r0,:128], r1 @@ -352,7 +352,7 @@ pop {pc} endfunc -function x264_deblock_h_luma_intra_neon +function deblock_h_luma_intra_neon push {lr} sub r0, r0, #4 vld1.64 {d22}, [r0], r1 @@ -447,7 +447,7 @@ vqmovun.s16 d1, q12 .endm -function x264_deblock_v_chroma_neon +function deblock_v_chroma_neon h264_loop_filter_start sub r0, r0, r1, lsl #1 @@ -465,7 +465,7 @@ bx lr endfunc -function x264_deblock_h_chroma_neon +function deblock_h_chroma_neon h264_loop_filter_start sub r0, r0, #4 @@ -499,7 +499,7 @@ bx lr endfunc -function x264_deblock_h_chroma_422_neon +function deblock_h_chroma_422_neon h264_loop_filter_start push {lr} sub r0, r0, #4 @@ -547,7 +547,7 @@ vqmovun.s16 d0, q11 .endm -function x264_deblock_h_chroma_mbaff_neon +function deblock_h_chroma_mbaff_neon h264_loop_filter_start sub r0, r0, #4 @@ -610,7 +610,7 @@ vbit q0, q2, q13 .endm -function x264_deblock_v_chroma_intra_neon +function deblock_v_chroma_intra_neon sub r0, r0, r1, lsl #1 vld2.8 {d18,d19}, [r0,:128], r1 vld2.8 {d16,d17}, [r0,:128], r1 @@ -626,7 +626,7 @@ bx lr endfunc -function x264_deblock_h_chroma_intra_neon +function deblock_h_chroma_intra_neon sub r0, r0, #4 vld1.8 {d18}, [r0], r1 vld1.8 {d16}, [r0], r1 @@ -657,15 +657,15 @@ bx lr endfunc -function x264_deblock_h_chroma_422_intra_neon +function deblock_h_chroma_422_intra_neon push {lr} - bl X(x264_deblock_h_chroma_intra_neon) + bl X(deblock_h_chroma_intra_neon) add r0, r0, #2 pop {lr} - b X(x264_deblock_h_chroma_intra_neon) + b X(deblock_h_chroma_intra_neon) endfunc -function x264_deblock_h_chroma_intra_mbaff_neon +function deblock_h_chroma_intra_mbaff_neon sub r0, r0, #4 vld1.8 {d18}, [r0], r1 vld1.8 {d16}, [r0], r1 @@ -688,7 +688,7 @@ bx lr endfunc -function x264_deblock_strength_neon +function deblock_strength_neon ldr ip, [sp] vmov.i8 q8, #0 lsl ip, ip, #8 diff -Nru x264-0.152.2854+gite9a5903/common/arm/deblock.h x264-0.158.2988+git-20191101.7817004/common/arm/deblock.h --- x264-0.152.2854+gite9a5903/common/arm/deblock.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/deblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,58 @@ +/***************************************************************************** + * deblock.h: arm deblocking + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_ARM_DEBLOCK_H +#define X264_ARM_DEBLOCK_H + +#define x264_deblock_v_luma_neon x264_template(deblock_v_luma_neon) +void x264_deblock_v_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_neon x264_template(deblock_h_luma_neon) +void x264_deblock_h_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_neon x264_template(deblock_v_chroma_neon) +void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_neon x264_template(deblock_h_chroma_neon) +void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_strength_neon x264_template(deblock_strength_neon) +void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_h_chroma_422_neon x264_template(deblock_h_chroma_422_neon) +void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mbaff_neon x264_template(deblock_h_chroma_mbaff_neon) +void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_intra_mbaff_neon x264_template(deblock_h_chroma_intra_mbaff_neon) +void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_neon x264_template(deblock_h_chroma_intra_neon) +void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_422_intra_neon x264_template(deblock_h_chroma_422_intra_neon) +void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_neon x264_template(deblock_v_chroma_intra_neon) +void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_luma_intra_neon x264_template(deblock_h_luma_intra_neon) +void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_luma_intra_neon x264_template(deblock_v_luma_intra_neon) +void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc-a.S x264-0.158.2988+git-20191101.7817004/common/arm/mc-a.S --- x264-0.152.2854+gite9a5903/common/arm/mc-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/mc-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Mans Rullgard @@ -38,7 +38,7 @@ // They also use nothing above armv5te, but we don't care about pre-armv6 // void prefetch_ref( uint8_t *pix, intptr_t stride, int parity ) -function x264_prefetch_ref_arm +function prefetch_ref_arm sub r2, r2, #1 add r0, r0, #64 and r2, r2, r1 @@ -58,7 +58,7 @@ // void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y, // uint8_t *pix_uv, intptr_t stride_uv, int mb_x ) -function x264_prefetch_fenc_arm +function prefetch_fenc_arm ldr ip, [sp] push {lr} and lr, ip, #3 @@ -83,8 +83,8 @@ endfunc -// void *x264_memcpy_aligned( void *dst, const void *src, size_t n ) -function x264_memcpy_aligned_neon +// void *memcpy_aligned( void *dst, const void *src, size_t n ) +function memcpy_aligned_neon orr r3, r0, r1, lsr #1 movrel ip, memcpy_table and r3, r3, #0xc @@ -150,8 +150,8 @@ .ltorg -// void x264_memzero_aligned( void *dst, size_t n ) -function x264_memzero_aligned_neon +// void memzero_aligned( void *dst, size_t n ) +function memzero_aligned_neon vmov.i8 q0, #0 vmov.i8 q1, #0 memzero_loop: @@ -168,18 +168,18 @@ // uint8_t *src1, intptr_t src1_stride, // uint8_t *src2, intptr_t src2_stride, int weight ); .macro AVGH w h -function x264_pixel_avg_\w\()x\h\()_neon +function pixel_avg_\w\()x\h\()_neon ldr ip, [sp, #8] push {r4-r6,lr} cmp ip, #32 ldrd r4, r5, [sp, #16] mov lr, #\h - beq x264_pixel_avg_w\w\()_neon + beq pixel_avg_w\w\()_neon rsbs r6, ip, #64 - blt x264_pixel_avg_weight_w\w\()_add_sub_neon // weight > 64 + blt pixel_avg_weight_w\w\()_add_sub_neon // weight > 64 cmp ip, #0 - bge x264_pixel_avg_weight_w\w\()_add_add_neon - b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0 + bge pixel_avg_weight_w\w\()_add_add_neon + b pixel_avg_weight_w\w\()_sub_add_neon // weight < 0 endfunc .endm @@ -244,7 +244,7 @@ .endm .macro AVG_WEIGHT ext -function x264_pixel_avg_weight_w4_\ext\()_neon, export=0 +function pixel_avg_weight_w4_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #2 @@ -260,7 +260,7 @@ pop {r4-r6,pc} endfunc -function x264_pixel_avg_weight_w8_\ext\()_neon, export=0 +function pixel_avg_weight_w8_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #4 @@ -284,7 +284,7 @@ pop {r4-r6,pc} endfunc -function x264_pixel_avg_weight_w16_\ext\()_neon, export=0 +function pixel_avg_weight_w16_\ext\()_neon, export=0 load_weights_\ext 1: // height loop subs lr, lr, #2 @@ -309,7 +309,7 @@ AVG_WEIGHT add_sub AVG_WEIGHT sub_add -function x264_pixel_avg_w4_neon, export=0 +function pixel_avg_w4_neon, export=0 subs lr, lr, #2 vld1.32 {d0[]}, [r2], r3 vld1.32 {d2[]}, [r4], r5 @@ -319,11 +319,11 @@ vrhadd.u8 d1, d1, d3 vst1.32 {d0[0]}, [r0,:32], r1 vst1.32 {d1[0]}, [r0,:32], r1 - bgt x264_pixel_avg_w4_neon + bgt pixel_avg_w4_neon pop {r4-r6,pc} endfunc -function x264_pixel_avg_w8_neon, export=0 +function pixel_avg_w8_neon, export=0 subs lr, lr, #4 vld1.64 {d0}, [r2], r3 vld1.64 {d2}, [r4], r5 @@ -341,11 +341,11 @@ vrhadd.u8 d3, d3, d5 vst1.64 {d2}, [r0,:64], r1 vst1.64 {d3}, [r0,:64], r1 - bgt x264_pixel_avg_w8_neon + bgt pixel_avg_w8_neon pop {r4-r6,pc} endfunc -function x264_pixel_avg_w16_neon, export=0 +function pixel_avg_w16_neon, export=0 subs lr, lr, #4 vld1.64 {d0-d1}, [r2], r3 vld1.64 {d2-d3}, [r4], r5 @@ -363,12 +363,12 @@ vrhadd.u8 q3, q3, q0 vst1.64 {d4-d5}, [r0,:128], r1 vst1.64 {d6-d7}, [r0,:128], r1 - bgt x264_pixel_avg_w16_neon + bgt pixel_avg_w16_neon pop {r4-r6,pc} endfunc -function x264_pixel_avg2_w4_neon +function pixel_avg2_w4_neon ldr ip, [sp, #4] push {lr} ldr lr, [sp, #4] @@ -386,7 +386,7 @@ pop {pc} endfunc -function x264_pixel_avg2_w8_neon +function pixel_avg2_w8_neon ldr ip, [sp, #4] push {lr} ldr lr, [sp, #4] @@ -404,7 +404,7 @@ pop {pc} endfunc -function x264_pixel_avg2_w16_neon +function pixel_avg2_w16_neon ldr ip, [sp, #4] push {lr} ldr lr, [sp, #4] @@ -422,7 +422,7 @@ pop {pc} endfunc -function x264_pixel_avg2_w20_neon +function pixel_avg2_w20_neon ldr ip, [sp, #4] push {lr} sub r1, r1, #16 @@ -464,7 +464,7 @@ // void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, intptr_t dst_stride, // const x264_weight_t *weight, int height ) -function x264_mc_weight_w20_neon +function mc_weight_w20_neon weight_prologue full sub r1, #16 weight20_loop: @@ -500,7 +500,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w16_neon +function mc_weight_w16_neon weight_prologue full weight16_loop: subs ip, #2 @@ -528,7 +528,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w8_neon +function mc_weight_w8_neon weight_prologue full weight8_loop: subs ip, #2 @@ -548,7 +548,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w4_neon +function mc_weight_w4_neon weight_prologue full weight4_loop: subs ip, #2 @@ -564,7 +564,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w20_nodenom_neon +function mc_weight_w20_nodenom_neon weight_prologue nodenom sub r1, #16 weight20_nodenom_loop: @@ -595,7 +595,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w16_nodenom_neon +function mc_weight_w16_nodenom_neon weight_prologue nodenom weight16_nodenom_loop: subs ip, #2 @@ -619,7 +619,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w8_nodenom_neon +function mc_weight_w8_nodenom_neon weight_prologue nodenom weight8_nodenom_loop: subs ip, #2 @@ -637,7 +637,7 @@ pop {r4-r5,pc} endfunc -function x264_mc_weight_w4_nodenom_neon +function mc_weight_w4_nodenom_neon weight_prologue nodenom weight4_nodenom_loop: subs ip, #2 @@ -661,7 +661,7 @@ .endm .macro weight_simple name op -function x264_mc_weight_w20_\name\()_neon +function mc_weight_w20_\name\()_neon weight_simple_prologue weight20_\name\()_loop: subs ip, #2 @@ -676,7 +676,7 @@ pop {pc} endfunc -function x264_mc_weight_w16_\name\()_neon +function mc_weight_w16_\name\()_neon weight_simple_prologue weight16_\name\()_loop: subs ip, #2 @@ -690,7 +690,7 @@ pop {pc} endfunc -function x264_mc_weight_w8_\name\()_neon +function mc_weight_w8_\name\()_neon weight_simple_prologue weight8_\name\()_loop: subs ip, #2 @@ -703,7 +703,7 @@ pop {pc} endfunc -function x264_mc_weight_w4_\name\()_neon +function mc_weight_w4_\name\()_neon weight_simple_prologue weight4_\name\()_loop: subs ip, #2 @@ -722,7 +722,7 @@ // void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height ) -function x264_mc_copy_w4_neon +function mc_copy_w4_neon ldr ip, [sp] copy_w4_loop: subs ip, ip, #4 @@ -738,7 +738,7 @@ bx lr endfunc -function x264_mc_copy_w8_neon +function mc_copy_w8_neon ldr ip, [sp] copy_w8_loop: subs ip, ip, #4 @@ -754,7 +754,7 @@ bx lr endfunc -function x264_mc_copy_w16_neon +function mc_copy_w16_neon ldr ip, [sp] copy_w16_loop: subs ip, ip, #4 @@ -770,7 +770,7 @@ bx lr endfunc -function x264_mc_copy_w16_aligned_neon +function mc_copy_w16_aligned_neon ldr ip, [sp] copy_w16_aligned_loop: subs ip, ip, #4 @@ -787,11 +787,10 @@ endfunc -// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride, -// uint8_t *src, intptr_t i_src_stride, -// int dx, int dy, int i_width, int i_height ); - -function x264_mc_chroma_neon +// void mc_chroma( uint8_t *dst, intptr_t i_dst_stride, +// uint8_t *src, intptr_t i_src_stride, +// int dx, int dy, int i_width, int i_height ); +function mc_chroma_neon push {r4-r8, lr} vpush {d8-d11} ldrd r4, r5, [sp, #56] @@ -1138,7 +1137,7 @@ // hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width ) -function x264_hpel_filter_v_neon +function hpel_filter_v_neon ldr ip, [sp] sub r1, r1, r3, lsl #1 push {lr} @@ -1178,7 +1177,7 @@ endfunc // hpel_filter_c( uint8_t *dst, int16_t *buf, int width ); -function x264_hpel_filter_c_neon +function hpel_filter_c_neon sub r1, #16 vld1.64 {d0-d3}, [r1,:128]! @@ -1263,7 +1262,7 @@ endfunc // hpel_filter_h( uint8_t *dst, uint8_t *src, int width ); -function x264_hpel_filter_h_neon +function hpel_filter_h_neon sub r1, #16 vmov.u8 d30, #5 vld1.64 {d0-d3}, [r1,:128]! @@ -1353,7 +1352,7 @@ // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, // uint8_t *dstc, intptr_t src_stride, intptr_t dst_stride, int width, // int height ) -function x264_frame_init_lowres_core_neon +function frame_init_lowres_core_neon push {r4-r10,lr} vpush {d8-d15} ldrd r4, r5, [sp, #96] @@ -1441,7 +1440,7 @@ pop {r4-r10,pc} endfunc -function x264_load_deinterleave_chroma_fdec_neon +function load_deinterleave_chroma_fdec_neon mov ip, #FDEC_STRIDE/2 1: vld2.8 {d0-d1}, [r1,:128], r2 @@ -1454,7 +1453,7 @@ bx lr endfunc -function x264_load_deinterleave_chroma_fenc_neon +function load_deinterleave_chroma_fenc_neon mov ip, #FENC_STRIDE/2 1: vld2.8 {d0-d1}, [r1,:128], r2 @@ -1467,7 +1466,7 @@ bx lr endfunc -function x264_plane_copy_core_neon +function plane_copy_core_neon push {r4,lr} ldr r4, [sp, #8] ldr lr, [sp, #12] @@ -1498,7 +1497,7 @@ pop {r4,pc} endfunc -function x264_plane_copy_deinterleave_neon +function plane_copy_deinterleave_neon push {r4-r7, lr} ldrd r6, r7, [sp, #28] ldrd r4, r5, [sp, #20] @@ -1524,7 +1523,7 @@ pop {r4-r7, pc} endfunc -function x264_plane_copy_deinterleave_rgb_neon +function plane_copy_deinterleave_rgb_neon push {r4-r8, r10, r11, lr} ldrd r4, r5, [sp, #32] ldrd r6, r7, [sp, #40] @@ -1576,7 +1575,7 @@ pop {r4-r8, r10, r11, pc} endfunc -function x264_plane_copy_interleave_core_neon +function plane_copy_interleave_core_neon push {r4-r7, lr} ldrd r6, r7, [sp, #28] ldrd r4, r5, [sp, #20] @@ -1603,7 +1602,7 @@ pop {r4-r7, pc} endfunc -function x264_plane_copy_swap_core_neon +function plane_copy_swap_core_neon push {r4-r5, lr} ldrd r4, r5, [sp, #12] add lr, r4, #15 @@ -1627,7 +1626,7 @@ pop {r4-r5, pc} endfunc -function x264_store_interleave_chroma_neon +function store_interleave_chroma_neon push {lr} ldr lr, [sp, #4] mov ip, #FDEC_STRIDE @@ -1651,7 +1650,7 @@ vadd.u16 q0, q0, q2 .endm -function x264_integral_init4h_neon +function integral_init4h_neon sub r3, r0, r2, lsl #1 vld1.8 {d6, d7}, [r1, :128]! 1: @@ -1686,7 +1685,7 @@ vadd.u16 q0, q0, \s .endm -function x264_integral_init8h_neon +function integral_init8h_neon sub r3, r0, r2, lsl #1 vld1.8 {d16, d17}, [r1, :128]! 1: @@ -1703,7 +1702,7 @@ bx lr endfunc -function x264_integral_init4v_neon +function integral_init4v_neon push {r4-r5} mov r3, r0 add r4, r0, r2, lsl #3 @@ -1742,7 +1741,7 @@ bx lr endfunc -function x264_integral_init8v_neon +function integral_init8v_neon add r2, r0, r1, lsl #4 sub r1, r1, #8 ands r3, r1, #16 - 1 @@ -1766,7 +1765,7 @@ bx lr endfunc -function x264_mbtree_propagate_cost_neon +function mbtree_propagate_cost_neon push {r4-r5,lr} ldrd r4, r5, [sp, #12] ldr lr, [sp, #20] @@ -1816,7 +1815,7 @@ pop {r4-r5,pc} endfunc -function x264_mbtree_propagate_list_internal_neon +function mbtree_propagate_list_internal_neon vld1.16 {d4[]}, [sp] @ bipred_weight movrel r12, pw_0to15 vmov.u16 q10, #0xc000 @@ -1882,7 +1881,7 @@ endfunc @ void mbtree_fix8_pack( int16_t *dst, float *src, int count ) -function x264_mbtree_fix8_pack_neon, export=1 +function mbtree_fix8_pack_neon, export=1 subs r3, r2, #8 blt 2f 1: @@ -1910,7 +1909,7 @@ endfunc @ void mbtree_fix8_unpack( float *dst, int16_t *src, int count ) -function x264_mbtree_fix8_unpack_neon, export=1 +function mbtree_fix8_unpack_neon, export=1 subs r3, r2, #8 blt 2f 1: diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc-c.c x264-0.158.2988+git-20191101.7817004/common/arm/mc-c.c --- x264-0.152.2854+gite9a5903/common/arm/mc-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/mc-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -27,46 +27,87 @@ #include "common/common.h" #include "mc.h" +#define x264_prefetch_ref_arm x264_template(prefetch_ref_arm) void x264_prefetch_ref_arm( uint8_t *, intptr_t, int ); +#define x264_prefetch_fenc_arm x264_template(prefetch_fenc_arm) void x264_prefetch_fenc_arm( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_memcpy_aligned_neon x264_template(memcpy_aligned_neon) void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n ); +#define x264_memzero_aligned_neon x264_template(memzero_aligned_neon) void x264_memzero_aligned_neon( void *dst, size_t n ); +#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon) void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon) void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon) void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon) void x264_pixel_avg_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon) void x264_pixel_avg_8x4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon) void x264_pixel_avg_4x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon) void x264_pixel_avg_4x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon) void x264_pixel_avg_4x4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon) void x264_pixel_avg_4x2_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon) void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon) void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon) void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon) void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ); +#define x264_plane_copy_core_neon x264_template(plane_copy_core_neon) void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_neon x264_template(plane_copy_deinterleave_neon) void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_rgb_neon x264_template(plane_copy_deinterleave_rgb_neon) void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +#define x264_plane_copy_interleave_core_neon x264_template(plane_copy_interleave_core_neon) void x264_plane_copy_interleave_core_neon( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +#define x264_plane_copy_swap_core_neon x264_template(plane_copy_swap_core_neon) void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h ); +#define x264_store_interleave_chroma_neon x264_template(store_interleave_chroma_neon) void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); +#define x264_load_deinterleave_chroma_fdec_neon x264_template(load_deinterleave_chroma_fdec_neon) void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_neon x264_template(load_deinterleave_chroma_fenc_neon) void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_mc_weight_w16_neon x264_template(mc_weight_w16_neon) +#define x264_mc_weight_w16_nodenom_neon x264_template(mc_weight_w16_nodenom_neon) +#define x264_mc_weight_w16_offsetadd_neon x264_template(mc_weight_w16_offsetadd_neon) +#define x264_mc_weight_w16_offsetsub_neon x264_template(mc_weight_w16_offsetsub_neon) +#define x264_mc_weight_w20_neon x264_template(mc_weight_w20_neon) +#define x264_mc_weight_w20_nodenom_neon x264_template(mc_weight_w20_nodenom_neon) +#define x264_mc_weight_w20_offsetadd_neon x264_template(mc_weight_w20_offsetadd_neon) +#define x264_mc_weight_w20_offsetsub_neon x264_template(mc_weight_w20_offsetsub_neon) +#define x264_mc_weight_w4_neon x264_template(mc_weight_w4_neon) +#define x264_mc_weight_w4_nodenom_neon x264_template(mc_weight_w4_nodenom_neon) +#define x264_mc_weight_w4_offsetadd_neon x264_template(mc_weight_w4_offsetadd_neon) +#define x264_mc_weight_w4_offsetsub_neon x264_template(mc_weight_w4_offsetsub_neon) +#define x264_mc_weight_w8_neon x264_template(mc_weight_w8_neon) +#define x264_mc_weight_w8_nodenom_neon x264_template(mc_weight_w8_nodenom_neon) +#define x264_mc_weight_w8_offsetadd_neon x264_template(mc_weight_w8_offsetadd_neon) +#define x264_mc_weight_w8_offsetsub_neon x264_template(mc_weight_w8_offsetsub_neon) #if !HIGH_BIT_DEPTH #define MC_WEIGHT(func)\ void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ @@ -74,7 +115,7 @@ void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ \ -static weight_fn_t x264_mc##func##_wtab_neon[6] =\ +static weight_fn_t mc##func##_wtab_neon[6] =\ {\ x264_mc_weight_w4##func##_neon,\ x264_mc_weight_w4##func##_neon,\ @@ -90,51 +131,67 @@ MC_WEIGHT(_offsetsub) #endif +#define x264_mc_copy_w4_neon x264_template(mc_copy_w4_neon) void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_copy_w8_neon x264_template(mc_copy_w8_neon) void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_copy_w16_neon x264_template(mc_copy_w16_neon) void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_copy_w16_aligned_neon x264_template(mc_copy_w16_aligned_neon) void x264_mc_copy_w16_aligned_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); +#define x264_mc_chroma_neon x264_template(mc_chroma_neon) void x264_mc_chroma_neon( uint8_t *, uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int ); +#define x264_frame_init_lowres_core_neon x264_template(frame_init_lowres_core_neon) void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int ); +#define x264_hpel_filter_v_neon x264_template(hpel_filter_v_neon) void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, intptr_t, int ); +#define x264_hpel_filter_c_neon x264_template(hpel_filter_c_neon) void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int ); +#define x264_hpel_filter_h_neon x264_template(hpel_filter_h_neon) void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int ); +#define x264_integral_init4h_neon x264_template(integral_init4h_neon) void x264_integral_init4h_neon( uint16_t *, uint8_t *, intptr_t ); +#define x264_integral_init4v_neon x264_template(integral_init4v_neon) void x264_integral_init4v_neon( uint16_t *, uint16_t *, intptr_t ); +#define x264_integral_init8h_neon x264_template(integral_init8h_neon) void x264_integral_init8h_neon( uint16_t *, uint8_t *, intptr_t ); +#define x264_integral_init8v_neon x264_template(integral_init8v_neon) void x264_integral_init8v_neon( uint16_t *, intptr_t ); +#define x264_mbtree_propagate_cost_neon x264_template(mbtree_propagate_cost_neon) void x264_mbtree_propagate_cost_neon( int16_t *, uint16_t *, uint16_t *, uint16_t *, uint16_t *, float *, int ); +#define x264_mbtree_fix8_pack_neon x264_template(mbtree_fix8_pack_neon) void x264_mbtree_fix8_pack_neon( uint16_t *dst, float *src, int count ); +#define x264_mbtree_fix8_unpack_neon x264_template(mbtree_fix8_unpack_neon) void x264_mbtree_fix8_unpack_neon( float *dst, uint16_t *src, int count ); #if !HIGH_BIT_DEPTH -static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w ) +static void weight_cache_neon( x264_t *h, x264_weight_t *w ) { if( w->i_scale == 1<i_denom ) { if( w->i_offset < 0 ) { - w->weightfn = x264_mc_offsetsub_wtab_neon; + w->weightfn = mc_offsetsub_wtab_neon; w->cachea[0] = -w->i_offset; } else { - w->weightfn = x264_mc_offsetadd_wtab_neon; + w->weightfn = mc_offsetadd_wtab_neon; w->cachea[0] = w->i_offset; } } else if( !w->i_denom ) - w->weightfn = x264_mc_nodenom_wtab_neon; + w->weightfn = mc_nodenom_wtab_neon; else - w->weightfn = x264_mc_wtab_neon; + w->weightfn = mc_wtab_neon; } -static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) = +static void (* const pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) = { NULL, x264_pixel_avg2_w4_neon, @@ -144,7 +201,7 @@ x264_pixel_avg2_w20_neon, }; -static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) = +static void (* const mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) = { NULL, x264_mc_copy_w4_neon, @@ -167,7 +224,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */ { uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); - x264_pixel_avg_wtab_neon[i_width>>2]( + pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); if( weight->weightfn ) @@ -176,7 +233,7 @@ else if( weight->weightfn ) weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height ); else - x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height ); + mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height ); } static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, @@ -193,7 +250,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */ { uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); - x264_pixel_avg_wtab_neon[i_width>>2]( + pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); if( weight->weightfn ) @@ -236,9 +293,8 @@ PLANE_COPY(16, neon) PLANE_COPY_SWAP(16, neon) PLANE_INTERLEAVE(neon) -#endif // !HIGH_BIT_DEPTH - PROPAGATE_LIST(neon) +#endif // !HIGH_BIT_DEPTH void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) { @@ -260,11 +316,11 @@ pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon; pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon; - pf->plane_copy = x264_plane_copy_neon; + pf->plane_copy = plane_copy_neon; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; - pf->plane_copy_interleave = x264_plane_copy_interleave_neon; - pf->plane_copy_swap = x264_plane_copy_swap_neon; + pf->plane_copy_interleave = plane_copy_interleave_neon; + pf->plane_copy_swap = plane_copy_swap_neon; pf->store_interleave_chroma = x264_store_interleave_chroma_neon; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon; @@ -280,10 +336,10 @@ pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_neon; pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_neon; - pf->weight = x264_mc_wtab_neon; - pf->offsetadd = x264_mc_offsetadd_wtab_neon; - pf->offsetsub = x264_mc_offsetsub_wtab_neon; - pf->weight_cache = x264_weight_cache_neon; + pf->weight = mc_wtab_neon; + pf->offsetadd = mc_offsetadd_wtab_neon; + pf->offsetsub = mc_offsetsub_wtab_neon; + pf->weight_cache = weight_cache_neon; pf->mc_chroma = x264_mc_chroma_neon; pf->mc_luma = mc_luma_neon; @@ -297,7 +353,7 @@ pf->integral_init8v = x264_integral_init8v_neon; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_neon; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_neon; + pf->mbtree_propagate_list = mbtree_propagate_list_neon; pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_neon; pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_neon; #endif // !HIGH_BIT_DEPTH diff -Nru x264-0.152.2854+gite9a5903/common/arm/mc.h x264-0.158.2988+git-20191101.7817004/common/arm/mc.h --- x264-0.152.2854+gite9a5903/common/arm/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * @@ -26,6 +26,7 @@ #ifndef X264_ARM_MC_H #define X264_ARM_MC_H +#define x264_mc_init_arm x264_template(mc_init_arm) void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/pixel-a.S x264-0.158.2988+git-20191101.7817004/common/arm/pixel-a.S --- x264-0.152.2854+gite9a5903/common/arm/pixel-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/pixel-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -46,7 +46,7 @@ .text .macro SAD4_ARMV6 h -function x264_pixel_sad_4x\h\()_armv6 +function pixel_sad_4x\h\()_armv6 push {r4-r6,lr} ldr r4, [r2], r3 ldr r5, [r0], r1 @@ -115,7 +115,7 @@ .endm .macro SAD_FUNC w, h, name, align:vararg -function x264_pixel_sad\name\()_\w\()x\h\()_neon +function pixel_sad\name\()_\w\()x\h\()_neon SAD_START_\w \align .if \w == 16 @@ -206,7 +206,7 @@ .endm .macro SAD_FUNC_DUAL w, h -function x264_pixel_sad_aligned_\w\()x\h\()_neon_dual +function pixel_sad_aligned_\w\()x\h\()_neon_dual SAD_DUAL_START_\w .rept \h / 2 - \w / 8 SAD_DUAL_\w @@ -328,7 +328,7 @@ .endm .macro SAD_X_FUNC x, w, h -function x264_pixel_sad_x\x\()_\w\()x\h\()_neon +function pixel_sad_x\x\()_\w\()x\h\()_neon push {r6-r7,lr} .if \x == 3 ldrd r6, r7, [sp, #12] @@ -390,7 +390,7 @@ SAD_X_FUNC 4, 16, 8 SAD_X_FUNC 4, 16, 16 -function x264_pixel_vsad_neon +function pixel_vsad_neon subs r2, r2, #2 vld1.8 {q0}, [r0], r1 vld1.8 {q1}, [r0], r1 @@ -414,7 +414,7 @@ bx lr endfunc -function x264_pixel_asd8_neon +function pixel_asd8_neon ldr r12, [sp, #0] sub r12, r12, #2 vld1.8 {d0}, [r0], r1 @@ -523,7 +523,7 @@ .endm .macro SSD_FUNC w h -function x264_pixel_ssd_\w\()x\h\()_neon +function pixel_ssd_\w\()x\h\()_neon SSD_START_\w .rept \h-2 SSD_\w @@ -544,7 +544,7 @@ SSD_FUNC 16, 8 SSD_FUNC 16, 16 -function x264_pixel_ssd_nv12_core_neon +function pixel_ssd_nv12_core_neon push {r4-r5} ldrd r4, r5, [sp, #8] add r12, r4, #8 @@ -624,7 +624,7 @@ \vpadal \qsqr_sum, \qsqr_last .endm -function x264_pixel_var_8x8_neon +function pixel_var_8x8_neon vld1.64 {d16}, [r0,:64], r1 vmull.u8 q1, d16, d16 vmovl.u8 q0, d16 @@ -645,10 +645,10 @@ VAR_SQR_SUM q1, q9, q14, d24 vld1.64 {d26}, [r0,:64], r1 VAR_SQR_SUM q2, q10, q15, d26 - b x264_var_end + b var_end endfunc -function x264_pixel_var_8x16_neon +function pixel_var_8x16_neon vld1.64 {d16}, [r0,:64], r1 vld1.64 {d18}, [r0,:64], r1 vmull.u8 q1, d16, d16 @@ -677,10 +677,10 @@ b 1b 2: VAR_SQR_SUM q2, q13, q15, d22 - b x264_var_end + b var_end endfunc -function x264_pixel_var_16x16_neon +function pixel_var_16x16_neon vld1.64 {d16-d17}, [r0,:128], r1 vmull.u8 q12, d16, d16 vmovl.u8 q0, d16 @@ -704,7 +704,7 @@ bgt var16_loop endfunc -function x264_var_end, export=0 +function var_end, export=0 vpaddl.u16 q8, q14 vpaddl.u16 q9, q15 vadd.u32 q1, q1, q8 @@ -744,7 +744,7 @@ vmlal.s16 \acc, \d1, \d1 .endm -function x264_pixel_var2_8x8_neon +function pixel_var2_8x8_neon mov r3, #16 DIFF_SUM q0, q10, d0, d1, d20, d21 DIFF_SUM q8, q11, d16, d17, d22, d23 @@ -783,7 +783,7 @@ bx lr endfunc -function x264_pixel_var2_8x16_neon +function pixel_var2_8x16_neon mov r3, #16 vld1.64 {d16}, [r0,:64]! vld1.64 {d17}, [r1,:64], r3 @@ -846,7 +846,7 @@ vsubl.u8 \q3, d6, d7 .endm -function x264_pixel_satd_4x4_neon +function pixel_satd_4x4_neon vld1.32 {d1[]}, [r2], r3 vld1.32 {d0[]}, [r0,:32], r1 vld1.32 {d3[]}, [r2], r3 @@ -868,7 +868,7 @@ bx lr endfunc -function x264_pixel_satd_4x8_neon +function pixel_satd_4x8_neon vld1.32 {d1[]}, [r2], r3 vld1.32 {d0[]}, [r0,:32], r1 vld1.32 {d3[]}, [r2], r3 @@ -892,10 +892,10 @@ vld1.32 {d6[1]}, [r0,:32], r1 vsubl.u8 q3, d6, d7 SUMSUB_AB q10, q11, q2, q3 - b x264_satd_4x8_8x4_end_neon + b satd_4x8_8x4_end_neon endfunc -function x264_pixel_satd_8x4_neon +function pixel_satd_8x4_neon vld1.64 {d1}, [r2], r3 vld1.64 {d0}, [r0,:64], r1 vsubl.u8 q0, d0, d1 @@ -912,7 +912,7 @@ SUMSUB_AB q10, q11, q2, q3 endfunc -function x264_satd_4x8_8x4_end_neon, export=0 +function satd_4x8_8x4_end_neon, export=0 vadd.s16 q0, q8, q10 vadd.s16 q1, q9, q11 vsub.s16 q2, q8, q10 @@ -939,10 +939,10 @@ bx lr endfunc -function x264_pixel_satd_8x8_neon +function pixel_satd_8x8_neon mov ip, lr - bl x264_satd_8x8_neon + bl satd_8x8_neon vadd.u16 q0, q12, q13 vadd.u16 q1, q14, q15 @@ -953,15 +953,15 @@ bx lr endfunc -function x264_pixel_satd_8x16_neon +function pixel_satd_8x16_neon vpush {d8-d11} mov ip, lr - bl x264_satd_8x8_neon + bl satd_8x8_neon vadd.u16 q4, q12, q13 vadd.u16 q5, q14, q15 - bl x264_satd_8x8_neon + bl satd_8x8_neon vadd.u16 q4, q4, q12 vadd.u16 q5, q5, q13 vadd.u16 q4, q4, q14 @@ -975,7 +975,7 @@ bx lr endfunc -function x264_satd_8x8_neon, export=0 +function satd_8x8_neon, export=0 LOAD_DIFF_8x4 q8, q9, q10, q11 vld1.64 {d7}, [r2], r3 SUMSUB_AB q0, q1, q8, q9 @@ -996,7 +996,7 @@ endfunc // one vertical hadamard pass and two horizontal -function x264_satd_8x4v_8x8h_neon, export=0 +function satd_8x4v_8x8h_neon, export=0 SUMSUB_ABCD q0, q1, q2, q3, q12, q13, q14, q15 vtrn.16 q8, q9 SUMSUB_AB q12, q14, q0, q2 @@ -1024,15 +1024,15 @@ bx lr endfunc -function x264_pixel_satd_16x8_neon +function pixel_satd_16x8_neon vpush {d8-d11} mov ip, lr - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q12, q13 vadd.u16 q5, q14, q15 - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q4, q12 vadd.u16 q5, q5, q13 vadd.u16 q4, q4, q14 @@ -1046,27 +1046,27 @@ bx lr endfunc -function x264_pixel_satd_16x16_neon +function pixel_satd_16x16_neon vpush {d8-d11} mov ip, lr - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q12, q13 vadd.u16 q5, q14, q15 - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q4, q12 vadd.u16 q5, q5, q13 vadd.u16 q4, q4, q14 vadd.u16 q5, q5, q15 - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q4, q12 vadd.u16 q5, q5, q13 vadd.u16 q4, q4, q14 vadd.u16 q5, q5, q15 - bl x264_satd_16x4_neon + bl satd_16x4_neon vadd.u16 q4, q4, q12 vadd.u16 q5, q5, q13 vadd.u16 q4, q4, q14 @@ -1080,7 +1080,7 @@ bx lr endfunc -function x264_satd_16x4_neon, export=0 +function satd_16x4_neon, export=0 vld1.64 {d2-d3}, [r2], r3 vld1.64 {d0-d1}, [r0,:128], r1 vsubl.u8 q8, d0, d2 @@ -1101,13 +1101,13 @@ vsubl.u8 q15, d5, d7 SUMSUB_AB q2, q3, q10, q11 SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3 - b x264_satd_8x4v_8x8h_neon + b satd_8x4v_8x8h_neon endfunc -function x264_pixel_sa8d_8x8_neon +function pixel_sa8d_8x8_neon mov ip, lr - bl x264_sa8d_8x8_neon + bl sa8d_8x8_neon vadd.u16 q0, q8, q9 HORIZ_ADD d0, d0, d1 mov lr, ip @@ -1117,23 +1117,23 @@ bx lr endfunc -function x264_pixel_sa8d_16x16_neon +function pixel_sa8d_16x16_neon vpush {d8-d11} mov ip, lr - bl x264_sa8d_8x8_neon + bl sa8d_8x8_neon vpaddl.u16 q4, q8 vpaddl.u16 q5, q9 - bl x264_sa8d_8x8_neon + bl sa8d_8x8_neon vpadal.u16 q4, q8 vpadal.u16 q5, q9 sub r0, r0, r1, lsl #4 sub r2, r2, r3, lsl #4 add r0, r0, #8 add r2, r2, #8 - bl x264_sa8d_8x8_neon + bl sa8d_8x8_neon vpadal.u16 q4, q8 vpadal.u16 q5, q9 - bl x264_sa8d_8x8_neon + bl sa8d_8x8_neon vpaddl.u16 q8, q8 vpaddl.u16 q9, q9 vadd.u32 q0, q4, q8 @@ -1182,7 +1182,7 @@ .endm .macro sa8d_satd_8x8 satd= -function x264_sa8d_\satd\()8x8_neon, export=0 +function sa8d_\satd\()8x8_neon, export=0 LOAD_DIFF_8x4 q8, q9, q10, q11 vld1.64 {d7}, [r2], r3 SUMSUB_AB q0, q1, q8, q9 @@ -1254,19 +1254,19 @@ sa8d_satd_8x8 sa8d_satd_8x8 satd_ -function x264_pixel_sa8d_satd_16x16_neon +function pixel_sa8d_satd_16x16_neon push {lr} vpush {q4-q7} vmov.u32 q4, #0 vmov.u32 q5, #0 - bl x264_sa8d_satd_8x8_neon - bl x264_sa8d_satd_8x8_neon + bl sa8d_satd_8x8_neon + bl sa8d_satd_8x8_neon sub r0, r0, r1, lsl #4 sub r2, r2, r3, lsl #4 add r0, r0, #8 add r2, r2, #8 - bl x264_sa8d_satd_8x8_neon - bl x264_sa8d_satd_8x8_neon + bl sa8d_satd_8x8_neon + bl sa8d_satd_8x8_neon vadd.u32 d1, d10, d11 vadd.u32 d0, d8, d9 vpadd.u32 d1, d1, d1 @@ -1280,7 +1280,7 @@ .macro HADAMARD_AC w h -function x264_pixel_hadamard_ac_\w\()x\h\()_neon +function pixel_hadamard_ac_\w\()x\h\()_neon vpush {d8-d15} movrel ip, mask_ac4 vmov.i8 q4, #0 @@ -1289,18 +1289,18 @@ vmov.i8 q5, #0 mov ip, lr - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .if \h > 8 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif .if \w > 8 sub r0, r0, r1, lsl #3 add r0, r0, #8 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif .if \w * \h == 256 sub r0, r0, r1, lsl #4 - bl x264_hadamard_ac_8x8_neon + bl hadamard_ac_8x8_neon .endif vadd.s32 d8, d8, d9 @@ -1321,7 +1321,7 @@ HADAMARD_AC 16, 16 // q4: satd q5: sa8d q6: mask_ac4 q7: mask_ac8 -function x264_hadamard_ac_8x8_neon, export=0 +function hadamard_ac_8x8_neon, export=0 vld1.64 {d2}, [r0,:64], r1 vld1.64 {d3}, [r0,:64], r1 vaddl.u8 q0, d2, d3 @@ -1435,7 +1435,7 @@ vmull.u8 \ssb, \db, \db .endm -function x264_pixel_ssim_4x4x2_core_neon +function pixel_ssim_4x4x2_core_neon ldr ip, [sp] vld1.64 {d0}, [r0], r1 vld1.64 {d2}, [r2], r3 @@ -1464,7 +1464,7 @@ endfunc // FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2 -function x264_pixel_ssim_end4_neon +function pixel_ssim_end4_neon vld1.32 {d16-d19}, [r0,:128]! vld1.32 {d20-d23}, [r1,:128]! vadd.s32 q0, q8, q10 diff -Nru x264-0.152.2854+gite9a5903/common/arm/pixel.h x264-0.158.2988+git-20191101.7817004/common/arm/pixel.h --- x264-0.152.2854+gite9a5903/common/arm/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * @@ -26,6 +26,68 @@ #ifndef X264_ARM_PIXEL_H #define X264_ARM_PIXEL_H +#define x264_pixel_avg2_w16_neon x264_template(pixel_avg2_w16_neon) +#define x264_pixel_avg2_w20_neon x264_template(pixel_avg2_w20_neon) +#define x264_pixel_avg2_w4_neon x264_template(pixel_avg2_w4_neon) +#define x264_pixel_avg2_w8_neon x264_template(pixel_avg2_w8_neon) +#define x264_pixel_avg_16x16_neon x264_template(pixel_avg_16x16_neon) +#define x264_pixel_avg_16x8_neon x264_template(pixel_avg_16x8_neon) +#define x264_pixel_avg_4x16_neon x264_template(pixel_avg_4x16_neon) +#define x264_pixel_avg_4x2_neon x264_template(pixel_avg_4x2_neon) +#define x264_pixel_avg_4x4_neon x264_template(pixel_avg_4x4_neon) +#define x264_pixel_avg_4x8_neon x264_template(pixel_avg_4x8_neon) +#define x264_pixel_avg_8x16_neon x264_template(pixel_avg_8x16_neon) +#define x264_pixel_avg_8x4_neon x264_template(pixel_avg_8x4_neon) +#define x264_pixel_avg_8x8_neon x264_template(pixel_avg_8x8_neon) +#define x264_pixel_sad_16x16_neon x264_template(pixel_sad_16x16_neon) +#define x264_pixel_sad_16x8_neon x264_template(pixel_sad_16x8_neon) +#define x264_pixel_sad_4x4_armv6 x264_template(pixel_sad_4x4_armv6) +#define x264_pixel_sad_4x4_neon x264_template(pixel_sad_4x4_neon) +#define x264_pixel_sad_4x8_armv6 x264_template(pixel_sad_4x8_armv6) +#define x264_pixel_sad_4x8_neon x264_template(pixel_sad_4x8_neon) +#define x264_pixel_sad_8x16_neon x264_template(pixel_sad_8x16_neon) +#define x264_pixel_sad_8x4_neon x264_template(pixel_sad_8x4_neon) +#define x264_pixel_sad_8x8_neon x264_template(pixel_sad_8x8_neon) +#define x264_pixel_sad_aligned_16x16_neon x264_template(pixel_sad_aligned_16x16_neon) +#define x264_pixel_sad_aligned_16x16_neon_dual x264_template(pixel_sad_aligned_16x16_neon_dual) +#define x264_pixel_sad_aligned_16x8_neon x264_template(pixel_sad_aligned_16x8_neon) +#define x264_pixel_sad_aligned_16x8_neon_dual x264_template(pixel_sad_aligned_16x8_neon_dual) +#define x264_pixel_sad_aligned_4x4_neon x264_template(pixel_sad_aligned_4x4_neon) +#define x264_pixel_sad_aligned_4x8_neon x264_template(pixel_sad_aligned_4x8_neon) +#define x264_pixel_sad_aligned_8x16_neon x264_template(pixel_sad_aligned_8x16_neon) +#define x264_pixel_sad_aligned_8x16_neon_dual x264_template(pixel_sad_aligned_8x16_neon_dual) +#define x264_pixel_sad_aligned_8x4_neon x264_template(pixel_sad_aligned_8x4_neon) +#define x264_pixel_sad_aligned_8x4_neon_dual x264_template(pixel_sad_aligned_8x4_neon_dual) +#define x264_pixel_sad_aligned_8x8_neon x264_template(pixel_sad_aligned_8x8_neon) +#define x264_pixel_sad_aligned_8x8_neon_dual x264_template(pixel_sad_aligned_8x8_neon_dual) +#define x264_pixel_sad_x3_16x16_neon x264_template(pixel_sad_x3_16x16_neon) +#define x264_pixel_sad_x3_16x8_neon x264_template(pixel_sad_x3_16x8_neon) +#define x264_pixel_sad_x3_4x4_neon x264_template(pixel_sad_x3_4x4_neon) +#define x264_pixel_sad_x3_4x8_neon x264_template(pixel_sad_x3_4x8_neon) +#define x264_pixel_sad_x3_8x16_neon x264_template(pixel_sad_x3_8x16_neon) +#define x264_pixel_sad_x3_8x4_neon x264_template(pixel_sad_x3_8x4_neon) +#define x264_pixel_sad_x3_8x8_neon x264_template(pixel_sad_x3_8x8_neon) +#define x264_pixel_sad_x4_16x16_neon x264_template(pixel_sad_x4_16x16_neon) +#define x264_pixel_sad_x4_16x8_neon x264_template(pixel_sad_x4_16x8_neon) +#define x264_pixel_sad_x4_4x4_neon x264_template(pixel_sad_x4_4x4_neon) +#define x264_pixel_sad_x4_4x8_neon x264_template(pixel_sad_x4_4x8_neon) +#define x264_pixel_sad_x4_8x16_neon x264_template(pixel_sad_x4_8x16_neon) +#define x264_pixel_sad_x4_8x4_neon x264_template(pixel_sad_x4_8x4_neon) +#define x264_pixel_sad_x4_8x8_neon x264_template(pixel_sad_x4_8x8_neon) +#define x264_pixel_satd_16x16_neon x264_template(pixel_satd_16x16_neon) +#define x264_pixel_satd_16x8_neon x264_template(pixel_satd_16x8_neon) +#define x264_pixel_satd_4x4_neon x264_template(pixel_satd_4x4_neon) +#define x264_pixel_satd_4x8_neon x264_template(pixel_satd_4x8_neon) +#define x264_pixel_satd_8x16_neon x264_template(pixel_satd_8x16_neon) +#define x264_pixel_satd_8x4_neon x264_template(pixel_satd_8x4_neon) +#define x264_pixel_satd_8x8_neon x264_template(pixel_satd_8x8_neon) +#define x264_pixel_ssd_16x16_neon x264_template(pixel_ssd_16x16_neon) +#define x264_pixel_ssd_16x8_neon x264_template(pixel_ssd_16x8_neon) +#define x264_pixel_ssd_4x4_neon x264_template(pixel_ssd_4x4_neon) +#define x264_pixel_ssd_4x8_neon x264_template(pixel_ssd_4x8_neon) +#define x264_pixel_ssd_8x16_neon x264_template(pixel_ssd_8x16_neon) +#define x264_pixel_ssd_8x4_neon x264_template(pixel_ssd_8x4_neon) +#define x264_pixel_ssd_8x8_neon x264_template(pixel_ssd_8x8_neon) #define DECL_PIXELS( ret, name, suffix, args ) \ ret x264_pixel_##name##_16x16_##suffix args;\ ret x264_pixel_##name##_16x8_##suffix args;\ @@ -52,30 +114,47 @@ DECL_X1( satd, neon ) DECL_X1( ssd, neon ) +#define x264_pixel_ssd_nv12_core_neon x264_template(pixel_ssd_nv12_core_neon) void x264_pixel_ssd_nv12_core_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, uint64_t *, uint64_t * ); +#define x264_pixel_vsad_neon x264_template(pixel_vsad_neon) int x264_pixel_vsad_neon( uint8_t *, intptr_t, int ); +#define x264_pixel_sa8d_8x8_neon x264_template(pixel_sa8d_8x8_neon) int x264_pixel_sa8d_8x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_sa8d_16x16_neon x264_template(pixel_sa8d_16x16_neon) int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_sa8d_satd_16x16_neon x264_template(pixel_sa8d_satd_16x16_neon) uint64_t x264_pixel_sa8d_satd_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t ); +#define x264_pixel_var_8x8_neon x264_template(pixel_var_8x8_neon) uint64_t x264_pixel_var_8x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_var_8x16_neon x264_template(pixel_var_8x16_neon) uint64_t x264_pixel_var_8x16_neon ( uint8_t *, intptr_t ); +#define x264_pixel_var_16x16_neon x264_template(pixel_var_16x16_neon) uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t ); +#define x264_pixel_var2_8x8_neon x264_template(pixel_var2_8x8_neon) int x264_pixel_var2_8x8_neon ( uint8_t *, uint8_t *, int * ); +#define x264_pixel_var2_8x16_neon x264_template(pixel_var2_8x16_neon) int x264_pixel_var2_8x16_neon( uint8_t *, uint8_t *, int * ); +#define x264_pixel_hadamard_ac_8x8_neon x264_template(pixel_hadamard_ac_8x8_neon) uint64_t x264_pixel_hadamard_ac_8x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_8x16_neon x264_template(pixel_hadamard_ac_8x16_neon) uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_16x8_neon x264_template(pixel_hadamard_ac_16x8_neon) uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t ); +#define x264_pixel_hadamard_ac_16x16_neon x264_template(pixel_hadamard_ac_16x16_neon) uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t ); +#define x264_pixel_ssim_4x4x2_core_neon x264_template(pixel_ssim_4x4x2_core_neon) void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t, const uint8_t *, intptr_t, int sums[2][4] ); +#define x264_pixel_ssim_end4_neon x264_template(pixel_ssim_end4_neon) float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width ); +#define x264_pixel_asd8_neon x264_template(pixel_asd8_neon) int x264_pixel_asd8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict-a.S x264-0.158.2988+git-20191101.7817004/common/arm/predict-a.S --- x264-0.152.2854+gite9a5903/common/arm/predict-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/predict-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Mans Rullgard @@ -77,7 +77,7 @@ // because gcc doesn't believe in using the free shift in add -function x264_predict_4x4_h_armv6 +function predict_4x4_h_armv6 ldrb r1, [r0, #0*FDEC_STRIDE-1] ldrb r2, [r0, #1*FDEC_STRIDE-1] ldrb r3, [r0, #2*FDEC_STRIDE-1] @@ -97,7 +97,7 @@ bx lr endfunc -function x264_predict_4x4_v_armv6 +function predict_4x4_v_armv6 ldr r1, [r0, #0 - 1 * FDEC_STRIDE] str r1, [r0, #0 + 0 * FDEC_STRIDE] str r1, [r0, #0 + 1 * FDEC_STRIDE] @@ -106,7 +106,7 @@ bx lr endfunc -function x264_predict_4x4_dc_armv6 +function predict_4x4_dc_armv6 mov ip, #0 ldr r1, [r0, #-FDEC_STRIDE] ldrb r2, [r0, #0*FDEC_STRIDE-1] @@ -129,7 +129,7 @@ bx lr endfunc -function x264_predict_4x4_dc_top_neon +function predict_4x4_dc_top_neon mov r12, #FDEC_STRIDE sub r1, r0, #FDEC_STRIDE vld1.32 d1[], [r1,:32] @@ -158,7 +158,7 @@ uadd8 \a2, \a2, \c2 .endm -function x264_predict_4x4_ddr_armv6 +function predict_4x4_ddr_armv6 ldr r1, [r0, # -FDEC_STRIDE] ldrb r2, [r0, # -FDEC_STRIDE-1] ldrb r3, [r0, #0*FDEC_STRIDE-1] @@ -187,7 +187,7 @@ pop {r4-r6,pc} endfunc -function x264_predict_4x4_ddl_neon +function predict_4x4_ddl_neon sub r0, #FDEC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d0}, [r0], ip @@ -206,7 +206,7 @@ bx lr endfunc -function x264_predict_8x8_dc_neon +function predict_8x8_dc_neon mov ip, #0 ldrd r2, r3, [r1, #8] push {r4-r5,lr} @@ -230,7 +230,7 @@ pop {r4-r5,pc} endfunc -function x264_predict_8x8_h_neon +function predict_8x8_h_neon add r1, r1, #7 mov ip, #FDEC_STRIDE vld1.64 {d16}, [r1] @@ -253,7 +253,7 @@ bx lr endfunc -function x264_predict_8x8_v_neon +function predict_8x8_v_neon add r1, r1, #16 mov r12, #FDEC_STRIDE vld1.8 {d0}, [r1,:64] @@ -263,7 +263,7 @@ bx lr endfunc -function x264_predict_8x8_ddl_neon +function predict_8x8_ddl_neon add r1, #16 vld1.8 {d0, d1}, [r1,:128] vmov.i8 q3, #0 @@ -291,7 +291,7 @@ bx lr endfunc -function x264_predict_8x8_ddr_neon +function predict_8x8_ddr_neon vld1.8 {d0-d3}, [r1,:128] vext.8 q2, q0, q1, #7 vext.8 q3, q0, q1, #9 @@ -321,7 +321,7 @@ bx lr endfunc -function x264_predict_8x8_vl_neon +function predict_8x8_vl_neon add r1, #16 mov r12, #FDEC_STRIDE @@ -352,7 +352,7 @@ bx lr endfunc -function x264_predict_8x8_vr_neon +function predict_8x8_vr_neon add r1, #8 mov r12, #FDEC_STRIDE vld1.8 {d4,d5}, [r1,:64] @@ -384,7 +384,7 @@ bx lr endfunc -function x264_predict_8x8_hd_neon +function predict_8x8_hd_neon mov r12, #FDEC_STRIDE add r1, #7 @@ -417,7 +417,7 @@ bx lr endfunc -function x264_predict_8x8_hu_neon +function predict_8x8_hu_neon mov r12, #FDEC_STRIDE add r1, #7 vld1.8 {d7}, [r1] @@ -450,7 +450,7 @@ bx lr endfunc -function x264_predict_8x8c_dc_top_neon +function predict_8x8c_dc_top_neon sub r2, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE vld1.8 {d0}, [r2,:64] @@ -463,7 +463,7 @@ b pred8x8_dc_end endfunc -function x264_predict_8x8c_dc_left_neon +function predict_8x8c_dc_left_neon mov r1, #FDEC_STRIDE sub r2, r0, #1 ldcol.8 d0, r2, r1 @@ -475,7 +475,7 @@ b pred8x8_dc_end endfunc -function x264_predict_8x8c_dc_neon +function predict_8x8c_dc_neon sub r2, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE vld1.8 {d0}, [r2,:64] @@ -501,7 +501,7 @@ bx lr endfunc -function x264_predict_8x8c_h_neon +function predict_8x8c_h_neon sub r1, r0, #1 mov ip, #FDEC_STRIDE .rept 4 @@ -513,7 +513,7 @@ bx lr endfunc -function x264_predict_8x8c_v_neon +function predict_8x8c_v_neon sub r0, r0, #FDEC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d0}, [r0,:64], ip @@ -523,7 +523,7 @@ bx lr endfunc -function x264_predict_8x8c_p_neon +function predict_8x8c_p_neon sub r3, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE add r2, r3, #4 @@ -572,7 +572,7 @@ endfunc -function x264_predict_8x16c_dc_top_neon +function predict_8x16c_dc_top_neon sub r2, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE vld1.8 {d0}, [r2,:64] @@ -597,7 +597,7 @@ bx lr endfunc -function x264_predict_8x16c_h_neon +function predict_8x16c_h_neon sub r1, r0, #1 mov ip, #FDEC_STRIDE .rept 8 @@ -609,7 +609,7 @@ bx lr endfunc -function x264_predict_8x16c_p_neon +function predict_8x16c_p_neon sub r3, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE add r2, r3, #4 @@ -667,7 +667,7 @@ endfunc -function x264_predict_16x16_dc_top_neon +function predict_16x16_dc_top_neon sub r2, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE vld1.8 {q0}, [r2,:128] @@ -677,7 +677,7 @@ b pred16x16_dc_end endfunc -function x264_predict_16x16_dc_left_neon +function predict_16x16_dc_left_neon mov r1, #FDEC_STRIDE sub r2, r0, #1 ldcol.8 d0, r2, r1 @@ -688,7 +688,7 @@ b pred16x16_dc_end endfunc -function x264_predict_16x16_dc_neon +function predict_16x16_dc_neon sub r3, r0, #FDEC_STRIDE sub r0, r0, #1 vld1.64 {d0-d1}, [r3,:128] @@ -726,7 +726,7 @@ bx lr endfunc -function x264_predict_16x16_h_neon +function predict_16x16_h_neon sub r1, r0, #1 mov ip, #FDEC_STRIDE .rept 8 @@ -740,7 +740,7 @@ bx lr endfunc -function x264_predict_16x16_v_neon +function predict_16x16_v_neon sub r0, r0, #FDEC_STRIDE mov ip, #FDEC_STRIDE vld1.64 {d0-d1}, [r0,:128], ip @@ -750,7 +750,7 @@ bx lr endfunc -function x264_predict_16x16_p_neon +function predict_16x16_p_neon sub r3, r0, #FDEC_STRIDE mov r1, #FDEC_STRIDE add r2, r3, #8 diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict-c.c x264-0.158.2988+git-20191101.7817004/common/arm/predict-c.c --- x264-0.152.2854+gite9a5903/common/arm/predict-c.c 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/predict-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * diff -Nru x264-0.152.2854+gite9a5903/common/arm/predict.h x264-0.158.2988+git-20191101.7817004/common/arm/predict.h --- x264-0.152.2854+gite9a5903/common/arm/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * @@ -26,45 +26,80 @@ #ifndef X264_ARM_PREDICT_H #define X264_ARM_PREDICT_H +#define x264_predict_4x4_dc_armv6 x264_template(predict_4x4_dc_armv6) void x264_predict_4x4_dc_armv6( uint8_t *src ); +#define x264_predict_4x4_dc_top_neon x264_template(predict_4x4_dc_top_neon) void x264_predict_4x4_dc_top_neon( uint8_t *src ); +#define x264_predict_4x4_v_armv6 x264_template(predict_4x4_v_armv6) void x264_predict_4x4_v_armv6( uint8_t *src ); +#define x264_predict_4x4_h_armv6 x264_template(predict_4x4_h_armv6) void x264_predict_4x4_h_armv6( uint8_t *src ); +#define x264_predict_4x4_ddr_armv6 x264_template(predict_4x4_ddr_armv6) void x264_predict_4x4_ddr_armv6( uint8_t *src ); +#define x264_predict_4x4_ddl_neon x264_template(predict_4x4_ddl_neon) void x264_predict_4x4_ddl_neon( uint8_t *src ); +#define x264_predict_8x8c_dc_neon x264_template(predict_8x8c_dc_neon) void x264_predict_8x8c_dc_neon( uint8_t *src ); +#define x264_predict_8x8c_dc_top_neon x264_template(predict_8x8c_dc_top_neon) void x264_predict_8x8c_dc_top_neon( uint8_t *src ); +#define x264_predict_8x8c_dc_left_neon x264_template(predict_8x8c_dc_left_neon) void x264_predict_8x8c_dc_left_neon( uint8_t *src ); +#define x264_predict_8x8c_h_neon x264_template(predict_8x8c_h_neon) void x264_predict_8x8c_h_neon( uint8_t *src ); +#define x264_predict_8x8c_v_neon x264_template(predict_8x8c_v_neon) void x264_predict_8x8c_v_neon( uint8_t *src ); +#define x264_predict_8x8c_p_neon x264_template(predict_8x8c_p_neon) void x264_predict_8x8c_p_neon( uint8_t *src ); +#define x264_predict_8x16c_h_neon x264_template(predict_8x16c_h_neon) void x264_predict_8x16c_h_neon( uint8_t *src ); +#define x264_predict_8x16c_dc_top_neon x264_template(predict_8x16c_dc_top_neon) void x264_predict_8x16c_dc_top_neon( uint8_t *src ); +#define x264_predict_8x16c_p_neon x264_template(predict_8x16c_p_neon) void x264_predict_8x16c_p_neon( uint8_t *src ); +#define x264_predict_8x8_dc_neon x264_template(predict_8x8_dc_neon) void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_ddl_neon x264_template(predict_8x8_ddl_neon) void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_ddr_neon x264_template(predict_8x8_ddr_neon) void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vl_neon x264_template(predict_8x8_vl_neon) void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vr_neon x264_template(predict_8x8_vr_neon) void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_v_neon x264_template(predict_8x8_v_neon) void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_h_neon x264_template(predict_8x8_h_neon) void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_hd_neon x264_template(predict_8x8_hd_neon) void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_hu_neon x264_template(predict_8x8_hu_neon) void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_16x16_dc_neon x264_template(predict_16x16_dc_neon) void x264_predict_16x16_dc_neon( uint8_t *src ); +#define x264_predict_16x16_dc_top_neon x264_template(predict_16x16_dc_top_neon) void x264_predict_16x16_dc_top_neon( uint8_t *src ); +#define x264_predict_16x16_dc_left_neon x264_template(predict_16x16_dc_left_neon) void x264_predict_16x16_dc_left_neon( uint8_t *src ); +#define x264_predict_16x16_h_neon x264_template(predict_16x16_h_neon) void x264_predict_16x16_h_neon( uint8_t *src ); +#define x264_predict_16x16_v_neon x264_template(predict_16x16_v_neon) void x264_predict_16x16_v_neon( uint8_t *src ); +#define x264_predict_16x16_p_neon x264_template(predict_16x16_p_neon) void x264_predict_16x16_p_neon( uint8_t *src ); +#define x264_predict_4x4_init_arm x264_template(predict_4x4_init_arm) void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] ); +#define x264_predict_8x8_init_arm x264_template(predict_8x8_init_arm) void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); +#define x264_predict_8x8c_init_arm x264_template(predict_8x8c_init_arm) void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x16c_init_arm x264_template(predict_8x16c_init_arm) void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] ); +#define x264_predict_16x16_init_arm x264_template(predict_16x16_init_arm) void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/arm/quant-a.S x264-0.158.2988+git-20191101.7817004/common/arm/quant-a.S --- x264-0.152.2854+gite9a5903/common/arm/quant-a.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/quant-a.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: David Conrad * Janne Grunau @@ -75,7 +75,7 @@ .endm // quant_2x2_dc( int16_t dct[4], int mf, int bias ) -function x264_quant_2x2_dc_neon +function quant_2x2_dc_neon vld1.64 {d0}, [r0,:64] vabs.s16 d3, d0 vdup.16 d2, r2 @@ -91,7 +91,7 @@ endfunc // quant_4x4_dc( int16_t dct[16], int mf, int bias ) -function x264_quant_4x4_dc_neon +function quant_4x4_dc_neon vld1.64 {d28-d31}, [r0,:128] vabs.s16 q8, q14 vabs.s16 q9, q15 @@ -103,7 +103,7 @@ endfunc // quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) -function x264_quant_4x4_neon +function quant_4x4_neon vld1.64 {d28-d31}, [r0,:128] vabs.s16 q8, q14 vabs.s16 q9, q15 @@ -115,7 +115,7 @@ endfunc // quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ) -function x264_quant_4x4x4_neon +function quant_4x4x4_neon vpush {d8-d15} vld1.64 {d28-d31}, [r0,:128] vabs.s16 q8, q14 @@ -156,7 +156,7 @@ endfunc // quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) -function x264_quant_8x8_neon +function quant_8x8_neon vld1.64 {d28-d31}, [r0,:128] vabs.s16 q8, q14 vabs.s16 q9, q15 @@ -191,7 +191,7 @@ // dequant_4x4( int16_t dct[16], int dequant_mf[6][16], int i_qp ) .macro DEQUANT size bits -function x264_dequant_\size\()_neon +function dequant_\size\()_neon DEQUANT_START \bits+2, \bits .ifc \size, 8x8 mov r2, #4 @@ -272,7 +272,7 @@ DEQUANT 8x8, 6 // dequant_4x4_dc( int16_t dct[16], int dequant_mf[6][16], int i_qp ) -function x264_dequant_4x4_dc_neon +function dequant_4x4_dc_neon DEQUANT_START 6, 6, yes blt dequant_4x4_dc_rshift @@ -318,7 +318,7 @@ endfunc .macro decimate_score_1x size -function x264_decimate_score\size\()_neon +function decimate_score\size\()_neon vld1.16 {q0, q1}, [r0, :128] movrel r3, mask_2bit vmov.s8 q3, #0x01 @@ -347,7 +347,7 @@ lsr r1, r1, #2 .endif rbit r1, r1 - movrelx r3, X(x264_decimate_table4), r2 + movrelx r3, X264(decimate_table4), r2 1: clz r2, r1 lsl r1, r1, r2 @@ -363,7 +363,7 @@ decimate_score_1x 15 decimate_score_1x 16 -function x264_decimate_score64_neon +function decimate_score64_neon push {lr} vld1.16 {q8, q9}, [r0, :128]! vld1.16 {q10, q11}, [r0, :128]! @@ -416,7 +416,7 @@ mvn r12, r12 mov r0, #0 mov lr, #32 - movrelx r3, X(x264_decimate_table8), r2 + movrelx r3, X264(decimate_table8), r2 beq 2f 1: clz r2, r1 @@ -449,7 +449,7 @@ endfunc // int coeff_last( int16_t *l ) -function x264_coeff_last4_arm +function coeff_last4_arm ldrd r2, r3, [r0] subs r0, r3, #0 movne r0, #2 @@ -459,7 +459,7 @@ bx lr endfunc -function x264_coeff_last8_arm +function coeff_last8_arm ldrd r2, r3, [r0, #8] orrs ip, r2, r3 movne r0, #4 @@ -474,7 +474,7 @@ endfunc .macro COEFF_LAST_1x size -function x264_coeff_last\size\()_neon +function coeff_last\size\()_neon .if \size == 15 sub r0, r0, #2 .endif @@ -500,7 +500,7 @@ COEFF_LAST_1x 15 COEFF_LAST_1x 16 -function x264_coeff_last64_neon +function coeff_last64_neon vld1.64 {d16-d19}, [r0,:128]! vqmovn.u16 d16, q8 vqmovn.u16 d17, q9 @@ -545,7 +545,7 @@ bx lr endfunc -function x264_denoise_dct_neon +function denoise_dct_neon 1: subs r3, r3, #16 vld1.16 {q0, q1}, [r0] vld1.32 {q12, q13}, [r1]! diff -Nru x264-0.152.2854+gite9a5903/common/arm/quant.h x264-0.158.2988+git-20191101.7817004/common/arm/quant.h --- x264-0.152.2854+gite9a5903/common/arm/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/arm/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: David Conrad * @@ -26,28 +26,46 @@ #ifndef X264_ARM_QUANT_H #define X264_ARM_QUANT_H +#define x264_quant_2x2_dc_armv6 x264_template(quant_2x2_dc_armv6) int x264_quant_2x2_dc_armv6( int16_t dct[4], int mf, int bias ); +#define x264_quant_2x2_dc_neon x264_template(quant_2x2_dc_neon) int x264_quant_2x2_dc_neon( int16_t dct[4], int mf, int bias ); +#define x264_quant_4x4_dc_neon x264_template(quant_4x4_dc_neon) int x264_quant_4x4_dc_neon( int16_t dct[16], int mf, int bias ); +#define x264_quant_4x4_neon x264_template(quant_4x4_neon) int x264_quant_4x4_neon( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_4x4x4_neon x264_template(quant_4x4x4_neon) int x264_quant_4x4x4_neon( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_8x8_neon x264_template(quant_8x8_neon) int x264_quant_8x8_neon( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ); +#define x264_dequant_4x4_dc_neon x264_template(dequant_4x4_dc_neon) void x264_dequant_4x4_dc_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4_neon x264_template(dequant_4x4_neon) void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_neon x264_template(dequant_8x8_neon) void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_decimate_score15_neon x264_template(decimate_score15_neon) int x264_decimate_score15_neon( int16_t * ); +#define x264_decimate_score16_neon x264_template(decimate_score16_neon) int x264_decimate_score16_neon( int16_t * ); +#define x264_decimate_score64_neon x264_template(decimate_score64_neon) int x264_decimate_score64_neon( int16_t * ); +#define x264_coeff_last4_arm x264_template(coeff_last4_arm) int x264_coeff_last4_arm( int16_t * ); +#define x264_coeff_last8_arm x264_template(coeff_last8_arm) int x264_coeff_last8_arm( int16_t * ); +#define x264_coeff_last15_neon x264_template(coeff_last15_neon) int x264_coeff_last15_neon( int16_t * ); +#define x264_coeff_last16_neon x264_template(coeff_last16_neon) int x264_coeff_last16_neon( int16_t * ); +#define x264_coeff_last64_neon x264_template(coeff_last64_neon) int x264_coeff_last64_neon( int16_t * ); +#define x264_denoise_dct_neon x264_template(denoise_dct_neon) void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/base.c x264-0.158.2988+git-20191101.7817004/common/base.c --- x264-0.152.2854+gite9a5903/common/base.c 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/base.c 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,1443 @@ +/***************************************************************************** + * base.c: misc common functions (bit depth independent) + ***************************************************************************** + * Copyright (C) 2003-2019 x264 project + * + * Authors: Loren Merritt + * Laurent Aimar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "base.h" + +#include + +#if HAVE_MALLOC_H +#include +#endif +#if HAVE_THP +#include +#endif + +/**************************************************************************** + * x264_reduce_fraction: + ****************************************************************************/ +#define REDUCE_FRACTION( name, type )\ +void name( type *n, type *d )\ +{ \ + type a = *n; \ + type b = *d; \ + type c; \ + if( !a || !b ) \ + return; \ + c = a % b; \ + while( c ) \ + { \ + a = b; \ + b = c; \ + c = a % b; \ + } \ + *n /= b; \ + *d /= b; \ +} + +REDUCE_FRACTION( x264_reduce_fraction , uint32_t ) +REDUCE_FRACTION( x264_reduce_fraction64, uint64_t ) + +/**************************************************************************** + * x264_log: + ****************************************************************************/ +void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg ) +{ + char *psz_prefix; + switch( i_level ) + { + case X264_LOG_ERROR: + psz_prefix = "error"; + break; + case X264_LOG_WARNING: + psz_prefix = "warning"; + break; + case X264_LOG_INFO: + psz_prefix = "info"; + break; + case X264_LOG_DEBUG: + psz_prefix = "debug"; + break; + default: + psz_prefix = "unknown"; + break; + } + fprintf( stderr, "x264 [%s]: ", psz_prefix ); + x264_vfprintf( stderr, psz_fmt, arg ); +} + +void x264_log_internal( int i_level, const char *psz_fmt, ... ) +{ + va_list arg; + va_start( arg, psz_fmt ); + x264_log_default( NULL, i_level, psz_fmt, arg ); + va_end( arg ); +} + +/**************************************************************************** + * x264_malloc: + ****************************************************************************/ +void *x264_malloc( int i_size ) +{ + uint8_t *align_buf = NULL; +#if HAVE_MALLOC_H +#if HAVE_THP +#define HUGE_PAGE_SIZE 2*1024*1024 +#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ + /* Attempt to allocate huge pages to reduce TLB misses. */ + if( i_size >= HUGE_PAGE_THRESHOLD ) + { + align_buf = memalign( HUGE_PAGE_SIZE, i_size ); + if( align_buf ) + { + /* Round up to the next huge page boundary if we are close enough. */ + size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1); + madvise( align_buf, madv_size, MADV_HUGEPAGE ); + } + } + else +#undef HUGE_PAGE_SIZE +#undef HUGE_PAGE_THRESHOLD +#endif + align_buf = memalign( NATIVE_ALIGN, i_size ); +#else + uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) ); + if( buf ) + { + align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **); + align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1); + *( (void **) ( align_buf - sizeof(void **) ) ) = buf; + } +#endif + if( !align_buf ) + x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size ); + return align_buf; +} + +/**************************************************************************** + * x264_free: + ****************************************************************************/ +void x264_free( void *p ) +{ + if( p ) + { +#if HAVE_MALLOC_H + free( p ); +#else + free( *( ( ( void **) p ) - 1 ) ); +#endif + } +} + +/**************************************************************************** + * x264_slurp_file: + ****************************************************************************/ +char *x264_slurp_file( const char *filename ) +{ + int b_error = 0; + int64_t i_size; + char *buf; + FILE *fh = x264_fopen( filename, "rb" ); + if( !fh ) + return NULL; + + b_error |= fseek( fh, 0, SEEK_END ) < 0; + b_error |= ( i_size = ftell( fh ) ) <= 0; + if( WORD_SIZE == 4 ) + b_error |= i_size > INT32_MAX; + b_error |= fseek( fh, 0, SEEK_SET ) < 0; + if( b_error ) + goto error; + + buf = x264_malloc( i_size+2 ); + if( !buf ) + goto error; + + b_error |= fread( buf, 1, i_size, fh ) != i_size; + fclose( fh ); + if( b_error ) + { + x264_free( buf ); + return NULL; + } + + if( buf[i_size-1] != '\n' ) + buf[i_size++] = '\n'; + buf[i_size] = '\0'; + + return buf; +error: + fclose( fh ); + return NULL; +} + +/**************************************************************************** + * x264_picture_init: + ****************************************************************************/ +REALIGN_STACK void x264_picture_init( x264_picture_t *pic ) +{ + memset( pic, 0, sizeof( x264_picture_t ) ); + pic->i_type = X264_TYPE_AUTO; + pic->i_qpplus1 = X264_QP_AUTO; + pic->i_pic_struct = PIC_STRUCT_AUTO; +} + +/**************************************************************************** + * x264_picture_alloc: + ****************************************************************************/ +REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) +{ + typedef struct + { + int planes; + int width_fix8[3]; + int height_fix8[3]; + } x264_csp_tab_t; + + static const x264_csp_tab_t csp_tab[] = + { + [X264_CSP_I400] = { 1, { 256*1 }, { 256*1 } }, + [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, + [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, + [X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, + [X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, + [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, + [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, + [X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, }, + [X264_CSP_YUYV] = { 1, { 256*2 }, { 256*1 }, }, + [X264_CSP_UYVY] = { 1, { 256*2 }, { 256*1 }, }, + [X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } }, + [X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } }, + [X264_CSP_BGR] = { 1, { 256*3 }, { 256*1 }, }, + [X264_CSP_BGRA] = { 1, { 256*4 }, { 256*1 }, }, + [X264_CSP_RGB] = { 1, { 256*3 }, { 256*1 }, }, + }; + + int csp = i_csp & X264_CSP_MASK; + if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 ) + return -1; + x264_picture_init( pic ); + pic->img.i_csp = i_csp; + pic->img.i_plane = csp_tab[csp].planes; + int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1; + int plane_offset[3] = {0}; + int frame_size = 0; + for( int i = 0; i < pic->img.i_plane; i++ ) + { + int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor; + int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride; + pic->img.i_stride[i] = stride; + plane_offset[i] = frame_size; + frame_size += plane_size; + } + pic->img.plane[0] = x264_malloc( frame_size ); + if( !pic->img.plane[0] ) + return -1; + for( int i = 1; i < pic->img.i_plane; i++ ) + pic->img.plane[i] = pic->img.plane[0] + plane_offset[i]; + return 0; +} + +/**************************************************************************** + * x264_picture_clean: + ****************************************************************************/ +REALIGN_STACK void x264_picture_clean( x264_picture_t *pic ) +{ + x264_free( pic->img.plane[0] ); + + /* just to be safe */ + memset( pic, 0, sizeof( x264_picture_t ) ); +} + +/**************************************************************************** + * x264_param_default: + ****************************************************************************/ +REALIGN_STACK void x264_param_default( x264_param_t *param ) +{ + /* */ + memset( param, 0, sizeof( x264_param_t ) ); + + /* CPU autodetect */ + param->cpu = x264_cpu_detect(); + param->i_threads = X264_THREADS_AUTO; + param->i_lookahead_threads = X264_THREADS_AUTO; + param->b_deterministic = 1; + param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO; + + /* Video properties */ + param->i_csp = X264_CHROMA_FORMAT ? X264_CHROMA_FORMAT : X264_CSP_I420; + param->i_width = 0; + param->i_height = 0; + param->vui.i_sar_width = 0; + param->vui.i_sar_height= 0; + param->vui.i_overscan = 0; /* undef */ + param->vui.i_vidformat = 5; /* undef */ + param->vui.b_fullrange = -1; /* default depends on input */ + param->vui.i_colorprim = 2; /* undef */ + param->vui.i_transfer = 2; /* undef */ + param->vui.i_colmatrix = -1; /* default depends on input */ + param->vui.i_chroma_loc= 0; /* left center */ + param->i_fps_num = 25; + param->i_fps_den = 1; + param->i_level_idc = -1; + param->i_slice_max_size = 0; + param->i_slice_max_mbs = 0; + param->i_slice_count = 0; +#if HAVE_BITDEPTH8 + param->i_bitdepth = 8; +#elif HAVE_BITDEPTH10 + param->i_bitdepth = 10; +#else + param->i_bitdepth = 8; +#endif + + /* Encoder parameters */ + param->i_frame_reference = 3; + param->i_keyint_max = 250; + param->i_keyint_min = X264_KEYINT_MIN_AUTO; + param->i_bframe = 3; + param->i_scenecut_threshold = 40; + param->i_bframe_adaptive = X264_B_ADAPT_FAST; + param->i_bframe_bias = 0; + param->i_bframe_pyramid = X264_B_PYRAMID_NORMAL; + param->b_interlaced = 0; + param->b_constrained_intra = 0; + + param->b_deblocking_filter = 1; + param->i_deblocking_filter_alphac0 = 0; + param->i_deblocking_filter_beta = 0; + + param->b_cabac = 1; + param->i_cabac_init_idc = 0; + + param->rc.i_rc_method = X264_RC_CRF; + param->rc.i_bitrate = 0; + param->rc.f_rate_tolerance = 1.0; + param->rc.i_vbv_max_bitrate = 0; + param->rc.i_vbv_buffer_size = 0; + param->rc.f_vbv_buffer_init = 0.9; + param->rc.i_qp_constant = -1; + param->rc.f_rf_constant = 23; + param->rc.i_qp_min = 0; + param->rc.i_qp_max = INT_MAX; + param->rc.i_qp_step = 4; + param->rc.f_ip_factor = 1.4; + param->rc.f_pb_factor = 1.3; + param->rc.i_aq_mode = X264_AQ_VARIANCE; + param->rc.f_aq_strength = 1.0; + param->rc.i_lookahead = 40; + + param->rc.b_stat_write = 0; + param->rc.psz_stat_out = "x264_2pass.log"; + param->rc.b_stat_read = 0; + param->rc.psz_stat_in = "x264_2pass.log"; + param->rc.f_qcompress = 0.6; + param->rc.f_qblur = 0.5; + param->rc.f_complexity_blur = 20; + param->rc.i_zones = 0; + param->rc.b_mb_tree = 1; + + /* Log */ + param->pf_log = x264_log_default; + param->p_log_private = NULL; + param->i_log_level = X264_LOG_INFO; + + /* */ + param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8; + param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8 + | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL; + param->analyse.i_me_method = X264_ME_HEX; + param->analyse.f_psy_rd = 1.0; + param->analyse.b_psy = 1; + param->analyse.f_psy_trellis = 0; + param->analyse.i_me_range = 16; + param->analyse.i_subpel_refine = 7; + param->analyse.b_mixed_references = 1; + param->analyse.b_chroma_me = 1; + param->analyse.i_mv_range_thread = -1; + param->analyse.i_mv_range = -1; // set from level_idc + param->analyse.i_chroma_qp_offset = 0; + param->analyse.b_fast_pskip = 1; + param->analyse.b_weighted_bipred = 1; + param->analyse.i_weighted_pred = X264_WEIGHTP_SMART; + param->analyse.b_dct_decimate = 1; + param->analyse.b_transform_8x8 = 1; + param->analyse.i_trellis = 1; + param->analyse.i_luma_deadzone[0] = 21; + param->analyse.i_luma_deadzone[1] = 11; + param->analyse.b_psnr = 0; + param->analyse.b_ssim = 0; + + param->i_cqm_preset = X264_CQM_FLAT; + memset( param->cqm_4iy, 16, sizeof( param->cqm_4iy ) ); + memset( param->cqm_4py, 16, sizeof( param->cqm_4py ) ); + memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) ); + memset( param->cqm_4pc, 16, sizeof( param->cqm_4pc ) ); + memset( param->cqm_8iy, 16, sizeof( param->cqm_8iy ) ); + memset( param->cqm_8py, 16, sizeof( param->cqm_8py ) ); + memset( param->cqm_8ic, 16, sizeof( param->cqm_8ic ) ); + memset( param->cqm_8pc, 16, sizeof( param->cqm_8pc ) ); + + param->b_repeat_headers = 1; + param->b_annexb = 1; + param->b_aud = 0; + param->b_vfr_input = 1; + param->i_nal_hrd = X264_NAL_HRD_NONE; + param->b_tff = 1; + param->b_pic_struct = 0; + param->b_fake_interlaced = 0; + param->i_frame_packing = -1; + param->i_alternative_transfer = 2; /* undef */ + param->b_opencl = 0; + param->i_opencl_device = 0; + param->opencl_device_id = NULL; + param->psz_clbin_file = NULL; + param->i_avcintra_class = 0; + param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC; +} + +static int param_apply_preset( x264_param_t *param, const char *preset ) +{ + char *end; + int i = strtol( preset, &end, 10 ); + if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 ) + preset = x264_preset_names[i]; + + if( !strcasecmp( preset, "ultrafast" ) ) + { + param->i_frame_reference = 1; + param->i_scenecut_threshold = 0; + param->b_deblocking_filter = 0; + param->b_cabac = 0; + param->i_bframe = 0; + param->analyse.intra = 0; + param->analyse.inter = 0; + param->analyse.b_transform_8x8 = 0; + param->analyse.i_me_method = X264_ME_DIA; + param->analyse.i_subpel_refine = 0; + param->rc.i_aq_mode = 0; + param->analyse.b_mixed_references = 0; + param->analyse.i_trellis = 0; + param->i_bframe_adaptive = X264_B_ADAPT_NONE; + param->rc.b_mb_tree = 0; + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; + param->analyse.b_weighted_bipred = 0; + param->rc.i_lookahead = 0; + } + else if( !strcasecmp( preset, "superfast" ) ) + { + param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4; + param->analyse.i_me_method = X264_ME_DIA; + param->analyse.i_subpel_refine = 1; + param->i_frame_reference = 1; + param->analyse.b_mixed_references = 0; + param->analyse.i_trellis = 0; + param->rc.b_mb_tree = 0; + param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; + param->rc.i_lookahead = 0; + } + else if( !strcasecmp( preset, "veryfast" ) ) + { + param->analyse.i_subpel_refine = 2; + param->i_frame_reference = 1; + param->analyse.b_mixed_references = 0; + param->analyse.i_trellis = 0; + param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; + param->rc.i_lookahead = 10; + } + else if( !strcasecmp( preset, "faster" ) ) + { + param->analyse.b_mixed_references = 0; + param->i_frame_reference = 2; + param->analyse.i_subpel_refine = 4; + param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; + param->rc.i_lookahead = 20; + } + else if( !strcasecmp( preset, "fast" ) ) + { + param->i_frame_reference = 2; + param->analyse.i_subpel_refine = 6; + param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; + param->rc.i_lookahead = 30; + } + else if( !strcasecmp( preset, "medium" ) ) + { + /* Default is medium */ + } + else if( !strcasecmp( preset, "slow" ) ) + { + param->analyse.i_subpel_refine = 8; + param->i_frame_reference = 5; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; + param->analyse.i_trellis = 2; + param->rc.i_lookahead = 50; + } + else if( !strcasecmp( preset, "slower" ) ) + { + param->analyse.i_me_method = X264_ME_UMH; + param->analyse.i_subpel_refine = 9; + param->i_frame_reference = 8; + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; + param->analyse.inter |= X264_ANALYSE_PSUB8x8; + param->analyse.i_trellis = 2; + param->rc.i_lookahead = 60; + } + else if( !strcasecmp( preset, "veryslow" ) ) + { + param->analyse.i_me_method = X264_ME_UMH; + param->analyse.i_subpel_refine = 10; + param->analyse.i_me_range = 24; + param->i_frame_reference = 16; + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; + param->analyse.inter |= X264_ANALYSE_PSUB8x8; + param->analyse.i_trellis = 2; + param->i_bframe = 8; + param->rc.i_lookahead = 60; + } + else if( !strcasecmp( preset, "placebo" ) ) + { + param->analyse.i_me_method = X264_ME_TESA; + param->analyse.i_subpel_refine = 11; + param->analyse.i_me_range = 24; + param->i_frame_reference = 16; + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; + param->analyse.inter |= X264_ANALYSE_PSUB8x8; + param->analyse.b_fast_pskip = 0; + param->analyse.i_trellis = 2; + param->i_bframe = 16; + param->rc.i_lookahead = 60; + } + else + { + x264_log_internal( X264_LOG_ERROR, "invalid preset '%s'\n", preset ); + return -1; + } + return 0; +} + +static int param_apply_tune( x264_param_t *param, const char *tune ) +{ + int psy_tuning_used = 0; + for( int len; tune += strspn( tune, ",./-+" ), (len = strcspn( tune, ",./-+" )); tune += len ) + { + if( len == 4 && !strncasecmp( tune, "film", 4 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->i_deblocking_filter_alphac0 = -1; + param->i_deblocking_filter_beta = -1; + param->analyse.f_psy_trellis = 0.15; + } + else if( len == 9 && !strncasecmp( tune, "animation", 9 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; + param->i_deblocking_filter_alphac0 = 1; + param->i_deblocking_filter_beta = 1; + param->analyse.f_psy_rd = 0.4; + param->rc.f_aq_strength = 0.6; + param->i_bframe += 2; + } + else if( len == 5 && !strncasecmp( tune, "grain", 5 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->i_deblocking_filter_alphac0 = -2; + param->i_deblocking_filter_beta = -2; + param->analyse.f_psy_trellis = 0.25; + param->analyse.b_dct_decimate = 0; + param->rc.f_pb_factor = 1.1; + param->rc.f_ip_factor = 1.1; + param->rc.f_aq_strength = 0.5; + param->analyse.i_luma_deadzone[0] = 6; + param->analyse.i_luma_deadzone[1] = 6; + param->rc.f_qcompress = 0.8; + } + else if( len == 10 && !strncasecmp( tune, "stillimage", 10 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->i_deblocking_filter_alphac0 = -3; + param->i_deblocking_filter_beta = -3; + param->analyse.f_psy_rd = 2.0; + param->analyse.f_psy_trellis = 0.7; + param->rc.f_aq_strength = 1.2; + } + else if( len == 4 && !strncasecmp( tune, "psnr", 4 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->rc.i_aq_mode = X264_AQ_NONE; + param->analyse.b_psy = 0; + } + else if( len == 4 && !strncasecmp( tune, "ssim", 4 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE; + param->analyse.b_psy = 0; + } + else if( len == 10 && !strncasecmp( tune, "fastdecode", 10 ) ) + { + param->b_deblocking_filter = 0; + param->b_cabac = 0; + param->analyse.b_weighted_bipred = 0; + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; + } + else if( len == 11 && !strncasecmp( tune, "zerolatency", 11 ) ) + { + param->rc.i_lookahead = 0; + param->i_sync_lookahead = 0; + param->i_bframe = 0; + param->b_sliced_threads = 1; + param->b_vfr_input = 0; + param->rc.b_mb_tree = 0; + } + else if( len == 6 && !strncasecmp( tune, "touhou", 6 ) ) + { + if( psy_tuning_used++ ) goto psy_failure; + param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; + param->i_deblocking_filter_alphac0 = -1; + param->i_deblocking_filter_beta = -1; + param->analyse.f_psy_trellis = 0.2; + param->rc.f_aq_strength = 1.3; + if( param->analyse.inter & X264_ANALYSE_PSUB16x16 ) + param->analyse.inter |= X264_ANALYSE_PSUB8x8; + } + else + { + x264_log_internal( X264_LOG_ERROR, "invalid tune '%.*s'\n", len, tune ); + return -1; + psy_failure: + x264_log_internal( X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %.*s\n", len, tune ); + } + } + return 0; +} + +REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune ) +{ + x264_param_default( param ); + + if( preset && param_apply_preset( param, preset ) < 0 ) + return -1; + if( tune && param_apply_tune( param, tune ) < 0 ) + return -1; + return 0; +} + +REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param ) +{ + /* Set faster options in case of turbo firstpass. */ + if( param->rc.b_stat_write && !param->rc.b_stat_read ) + { + param->i_frame_reference = 1; + param->analyse.b_transform_8x8 = 0; + param->analyse.inter = 0; + param->analyse.i_me_method = X264_ME_DIA; + param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine ); + param->analyse.i_trellis = 0; + param->analyse.b_fast_pskip = 1; + } +} + +static int profile_string_to_int( const char *str ) +{ + if( !strcasecmp( str, "baseline" ) ) + return PROFILE_BASELINE; + if( !strcasecmp( str, "main" ) ) + return PROFILE_MAIN; + if( !strcasecmp( str, "high" ) ) + return PROFILE_HIGH; + if( !strcasecmp( str, "high10" ) ) + return PROFILE_HIGH10; + if( !strcasecmp( str, "high422" ) ) + return PROFILE_HIGH422; + if( !strcasecmp( str, "high444" ) ) + return PROFILE_HIGH444_PREDICTIVE; + return -1; +} + +REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile ) +{ + if( !profile ) + return 0; + + const int qp_bd_offset = 6 * (param->i_bitdepth-8); + int p = profile_string_to_int( profile ); + if( p < 0 ) + { + x264_log_internal( X264_LOG_ERROR, "invalid profile: %s\n", profile ); + return -1; + } + if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) || + (param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + qp_bd_offset) <= 0)) ) + { + x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile ); + return -1; + } + if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 ) + { + x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile ); + return -1; + } + if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 ) + { + x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile ); + return -1; + } + if( p < PROFILE_HIGH10 && param->i_bitdepth > 8 ) + { + x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, param->i_bitdepth ); + return -1; + } + if( p < PROFILE_HIGH && (param->i_csp & X264_CSP_MASK) == X264_CSP_I400 ) + { + x264_log_internal( X264_LOG_ERROR, "%s profile doesn't support 4:0:0\n", profile ); + return -1; + } + + if( p == PROFILE_BASELINE ) + { + param->analyse.b_transform_8x8 = 0; + param->b_cabac = 0; + param->i_cqm_preset = X264_CQM_FLAT; + param->psz_cqm_file = NULL; + param->i_bframe = 0; + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; + if( param->b_interlaced ) + { + x264_log_internal( X264_LOG_ERROR, "baseline profile doesn't support interlacing\n" ); + return -1; + } + if( param->b_fake_interlaced ) + { + x264_log_internal( X264_LOG_ERROR, "baseline profile doesn't support fake interlacing\n" ); + return -1; + } + } + else if( p == PROFILE_MAIN ) + { + param->analyse.b_transform_8x8 = 0; + param->i_cqm_preset = X264_CQM_FLAT; + param->psz_cqm_file = NULL; + } + return 0; +} + +static int parse_enum( const char *arg, const char * const *names, int *dst ) +{ + for( int i = 0; names[i]; i++ ) + if( !strcasecmp( arg, names[i] ) ) + { + *dst = i; + return 0; + } + return -1; +} + +static int parse_cqm( const char *str, uint8_t *cqm, int length ) +{ + int i = 0; + do { + int coef; + if( !sscanf( str, "%d", &coef ) || coef < 1 || coef > 255 ) + return -1; + cqm[i++] = coef; + } while( i < length && (str = strchr( str, ',' )) && str++ ); + return (i == length) ? 0 : -1; +} + +static int atobool_internal( const char *str, int *b_error ) +{ + if( !strcmp(str, "1") || + !strcasecmp(str, "true") || + !strcasecmp(str, "yes") ) + return 1; + if( !strcmp(str, "0") || + !strcasecmp(str, "false") || + !strcasecmp(str, "no") ) + return 0; + *b_error = 1; + return 0; +} + +static int atoi_internal( const char *str, int *b_error ) +{ + char *end; + int v = strtol( str, &end, 0 ); + if( end == str || *end != '\0' ) + *b_error = 1; + return v; +} + +static double atof_internal( const char *str, int *b_error ) +{ + char *end; + double v = strtod( str, &end ); + if( end == str || *end != '\0' ) + *b_error = 1; + return v; +} + +#define atobool(str) ( name_was_bool = 1, atobool_internal( str, &b_error ) ) +#undef atoi +#undef atof +#define atoi(str) atoi_internal( str, &b_error ) +#define atof(str) atof_internal( str, &b_error ) + +REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value ) +{ + char *name_buf = NULL; + int b_error = 0; + int errortype = X264_PARAM_BAD_VALUE; + int name_was_bool; + int value_was_null = !value; + + if( !name ) + return X264_PARAM_BAD_NAME; + if( !value ) + value = "true"; + + if( value[0] == '=' ) + value++; + + if( strchr( name, '_' ) ) // s/_/-/g + { + char *c; + name_buf = strdup(name); + if( !name_buf ) + return X264_PARAM_BAD_NAME; + while( (c = strchr( name_buf, '_' )) ) + *c = '-'; + name = name_buf; + } + + if( !strncmp( name, "no", 2 ) ) + { + name += 2; + if( name[0] == '-' ) + name++; + value = atobool(value) ? "false" : "true"; + } + name_was_bool = 0; + +#define OPT(STR) else if( !strcmp( name, STR ) ) +#define OPT2(STR0, STR1) else if( !strcmp( name, STR0 ) || !strcmp( name, STR1 ) ) + if( 0 ); + OPT("asm") + { + p->cpu = isdigit(value[0]) ? atoi(value) : + !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0; + if( b_error ) + { + char *buf = strdup( value ); + if( buf ) + { + char *tok, UNUSED *saveptr=NULL, *init; + b_error = 0; + p->cpu = 0; + for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL ) + { + int i = 0; + while( x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name) ) + i++; + p->cpu |= x264_cpu_names[i].flags; + if( !x264_cpu_names[i].flags ) + b_error = 1; + } + free( buf ); + if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) ) + p->cpu |= X264_CPU_SSE2_IS_FAST; + } + } + } + OPT("threads") + { + if( !strcasecmp(value, "auto") ) + p->i_threads = X264_THREADS_AUTO; + else + p->i_threads = atoi(value); + } + OPT("lookahead-threads") + { + if( !strcasecmp(value, "auto") ) + p->i_lookahead_threads = X264_THREADS_AUTO; + else + p->i_lookahead_threads = atoi(value); + } + OPT("sliced-threads") + p->b_sliced_threads = atobool(value); + OPT("sync-lookahead") + { + if( !strcasecmp(value, "auto") ) + p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO; + else + p->i_sync_lookahead = atoi(value); + } + OPT2("deterministic", "n-deterministic") + p->b_deterministic = atobool(value); + OPT("cpu-independent") + p->b_cpu_independent = atobool(value); + OPT2("level", "level-idc") + { + if( !strcmp(value, "1b") ) + p->i_level_idc = 9; + else if( atof(value) < 7 ) + p->i_level_idc = (int)(10*atof(value)+.5); + else + p->i_level_idc = atoi(value); + } + OPT("bluray-compat") + p->b_bluray_compat = atobool(value); + OPT("avcintra-class") + p->i_avcintra_class = atoi(value); + OPT("avcintra-flavor") + b_error |= parse_enum( value, x264_avcintra_flavor_names, &p->i_avcintra_flavor ); + OPT("sar") + { + b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) && + 2 != sscanf( value, "%d/%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) ); + } + OPT("overscan") + b_error |= parse_enum( value, x264_overscan_names, &p->vui.i_overscan ); + OPT("videoformat") + b_error |= parse_enum( value, x264_vidformat_names, &p->vui.i_vidformat ); + OPT("fullrange") + b_error |= parse_enum( value, x264_fullrange_names, &p->vui.b_fullrange ); + OPT("colorprim") + b_error |= parse_enum( value, x264_colorprim_names, &p->vui.i_colorprim ); + OPT("transfer") + b_error |= parse_enum( value, x264_transfer_names, &p->vui.i_transfer ); + OPT("colormatrix") + b_error |= parse_enum( value, x264_colmatrix_names, &p->vui.i_colmatrix ); + OPT("chromaloc") + { + p->vui.i_chroma_loc = atoi(value); + b_error = ( p->vui.i_chroma_loc < 0 || p->vui.i_chroma_loc > 5 ); + } + OPT("alternative-transfer") + b_error |= parse_enum( value, x264_transfer_names, &p->i_alternative_transfer ); + OPT("fps") + { + if( sscanf( value, "%u/%u", &p->i_fps_num, &p->i_fps_den ) != 2 ) + { + double fps = atof(value); + if( fps > 0.0 && fps <= INT_MAX/1000.0 ) + { + p->i_fps_num = (int)(fps * 1000.0 + .5); + p->i_fps_den = 1000; + } + else + { + p->i_fps_num = atoi(value); + p->i_fps_den = 1; + } + } + } + OPT2("ref", "frameref") + p->i_frame_reference = atoi(value); + OPT("dpb-size") + p->i_dpb_size = atoi(value); + OPT("keyint") + { + if( strstr( value, "infinite" ) ) + p->i_keyint_max = X264_KEYINT_MAX_INFINITE; + else + p->i_keyint_max = atoi(value); + } + OPT2("min-keyint", "keyint-min") + { + p->i_keyint_min = atoi(value); + if( p->i_keyint_max < p->i_keyint_min ) + p->i_keyint_max = p->i_keyint_min; + } + OPT("scenecut") + { + p->i_scenecut_threshold = atobool(value); + if( b_error || p->i_scenecut_threshold ) + { + b_error = 0; + p->i_scenecut_threshold = atoi(value); + } + } + OPT("intra-refresh") + p->b_intra_refresh = atobool(value); + OPT("bframes") + p->i_bframe = atoi(value); + OPT("b-adapt") + { + p->i_bframe_adaptive = atobool(value); + if( b_error ) + { + b_error = 0; + p->i_bframe_adaptive = atoi(value); + } + } + OPT("b-bias") + p->i_bframe_bias = atoi(value); + OPT("b-pyramid") + { + b_error |= parse_enum( value, x264_b_pyramid_names, &p->i_bframe_pyramid ); + if( b_error ) + { + b_error = 0; + p->i_bframe_pyramid = atoi(value); + } + } + OPT("open-gop") + p->b_open_gop = atobool(value); + OPT("nf") + p->b_deblocking_filter = !atobool(value); + OPT2("filter", "deblock") + { + if( 2 == sscanf( value, "%d:%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) || + 2 == sscanf( value, "%d,%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) ) + { + p->b_deblocking_filter = 1; + } + else if( sscanf( value, "%d", &p->i_deblocking_filter_alphac0 ) ) + { + p->b_deblocking_filter = 1; + p->i_deblocking_filter_beta = p->i_deblocking_filter_alphac0; + } + else + p->b_deblocking_filter = atobool(value); + } + OPT("slice-max-size") + p->i_slice_max_size = atoi(value); + OPT("slice-max-mbs") + p->i_slice_max_mbs = atoi(value); + OPT("slice-min-mbs") + p->i_slice_min_mbs = atoi(value); + OPT("slices") + p->i_slice_count = atoi(value); + OPT("slices-max") + p->i_slice_count_max = atoi(value); + OPT("cabac") + p->b_cabac = atobool(value); + OPT("cabac-idc") + p->i_cabac_init_idc = atoi(value); + OPT("interlaced") + p->b_interlaced = atobool(value); + OPT("tff") + p->b_interlaced = p->b_tff = atobool(value); + OPT("bff") + { + p->b_interlaced = atobool(value); + p->b_tff = !p->b_interlaced; + } + OPT("constrained-intra") + p->b_constrained_intra = atobool(value); + OPT("cqm") + { + if( strstr( value, "flat" ) ) + p->i_cqm_preset = X264_CQM_FLAT; + else if( strstr( value, "jvt" ) ) + p->i_cqm_preset = X264_CQM_JVT; + else + p->psz_cqm_file = strdup(value); + } + OPT("cqmfile") + p->psz_cqm_file = strdup(value); + OPT("cqm4") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4iy, 16 ); + b_error |= parse_cqm( value, p->cqm_4py, 16 ); + b_error |= parse_cqm( value, p->cqm_4ic, 16 ); + b_error |= parse_cqm( value, p->cqm_4pc, 16 ); + } + OPT("cqm8") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_8iy, 64 ); + b_error |= parse_cqm( value, p->cqm_8py, 64 ); + b_error |= parse_cqm( value, p->cqm_8ic, 64 ); + b_error |= parse_cqm( value, p->cqm_8pc, 64 ); + } + OPT("cqm4i") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4iy, 16 ); + b_error |= parse_cqm( value, p->cqm_4ic, 16 ); + } + OPT("cqm4p") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4py, 16 ); + b_error |= parse_cqm( value, p->cqm_4pc, 16 ); + } + OPT("cqm4iy") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4iy, 16 ); + } + OPT("cqm4ic") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4ic, 16 ); + } + OPT("cqm4py") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4py, 16 ); + } + OPT("cqm4pc") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_4pc, 16 ); + } + OPT("cqm8i") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_8iy, 64 ); + b_error |= parse_cqm( value, p->cqm_8ic, 64 ); + } + OPT("cqm8p") + { + p->i_cqm_preset = X264_CQM_CUSTOM; + b_error |= parse_cqm( value, p->cqm_8py, 64 ); + b_error |= parse_cqm( value, p->cqm_8pc, 64 ); + } + OPT("log") + p->i_log_level = atoi(value); + OPT("dump-yuv") + p->psz_dump_yuv = strdup(value); + OPT2("analyse", "partitions") + { + p->analyse.inter = 0; + if( strstr( value, "none" ) ) p->analyse.inter = 0; + if( strstr( value, "all" ) ) p->analyse.inter = ~0; + + if( strstr( value, "i4x4" ) ) p->analyse.inter |= X264_ANALYSE_I4x4; + if( strstr( value, "i8x8" ) ) p->analyse.inter |= X264_ANALYSE_I8x8; + if( strstr( value, "p8x8" ) ) p->analyse.inter |= X264_ANALYSE_PSUB16x16; + if( strstr( value, "p4x4" ) ) p->analyse.inter |= X264_ANALYSE_PSUB8x8; + if( strstr( value, "b8x8" ) ) p->analyse.inter |= X264_ANALYSE_BSUB16x16; + } + OPT("8x8dct") + p->analyse.b_transform_8x8 = atobool(value); + OPT2("weightb", "weight-b") + p->analyse.b_weighted_bipred = atobool(value); + OPT("weightp") + p->analyse.i_weighted_pred = atoi(value); + OPT2("direct", "direct-pred") + b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred ); + OPT("chroma-qp-offset") + p->analyse.i_chroma_qp_offset = atoi(value); + OPT("me") + b_error |= parse_enum( value, x264_motion_est_names, &p->analyse.i_me_method ); + OPT2("merange", "me-range") + p->analyse.i_me_range = atoi(value); + OPT2("mvrange", "mv-range") + p->analyse.i_mv_range = atoi(value); + OPT2("mvrange-thread", "mv-range-thread") + p->analyse.i_mv_range_thread = atoi(value); + OPT2("subme", "subq") + p->analyse.i_subpel_refine = atoi(value); + OPT("psy-rd") + { + if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) || + 2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) || + 2 == sscanf( value, "%f|%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis )) + { } + else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) ) + { + p->analyse.f_psy_trellis = 0; + } + else + { + p->analyse.f_psy_rd = 0; + p->analyse.f_psy_trellis = 0; + } + } + OPT("psy") + p->analyse.b_psy = atobool(value); + OPT("chroma-me") + p->analyse.b_chroma_me = atobool(value); + OPT("mixed-refs") + p->analyse.b_mixed_references = atobool(value); + OPT("trellis") + p->analyse.i_trellis = atoi(value); + OPT("fast-pskip") + p->analyse.b_fast_pskip = atobool(value); + OPT("dct-decimate") + p->analyse.b_dct_decimate = atobool(value); + OPT("deadzone-inter") + p->analyse.i_luma_deadzone[0] = atoi(value); + OPT("deadzone-intra") + p->analyse.i_luma_deadzone[1] = atoi(value); + OPT("nr") + p->analyse.i_noise_reduction = atoi(value); + OPT("bitrate") + { + p->rc.i_bitrate = atoi(value); + p->rc.i_rc_method = X264_RC_ABR; + } + OPT2("qp", "qp_constant") + { + p->rc.i_qp_constant = atoi(value); + p->rc.i_rc_method = X264_RC_CQP; + } + OPT("crf") + { + p->rc.f_rf_constant = atof(value); + p->rc.i_rc_method = X264_RC_CRF; + } + OPT("crf-max") + p->rc.f_rf_constant_max = atof(value); + OPT("rc-lookahead") + p->rc.i_lookahead = atoi(value); + OPT2("qpmin", "qp-min") + p->rc.i_qp_min = atoi(value); + OPT2("qpmax", "qp-max") + p->rc.i_qp_max = atoi(value); + OPT2("qpstep", "qp-step") + p->rc.i_qp_step = atoi(value); + OPT("ratetol") + p->rc.f_rate_tolerance = !strncmp("inf", value, 3) ? 1e9 : atof(value); + OPT("vbv-maxrate") + p->rc.i_vbv_max_bitrate = atoi(value); + OPT("vbv-bufsize") + p->rc.i_vbv_buffer_size = atoi(value); + OPT("vbv-init") + p->rc.f_vbv_buffer_init = atof(value); + OPT2("ipratio", "ip-factor") + p->rc.f_ip_factor = atof(value); + OPT2("pbratio", "pb-factor") + p->rc.f_pb_factor = atof(value); + OPT("aq-mode") + p->rc.i_aq_mode = atoi(value); + OPT("aq-strength") + p->rc.f_aq_strength = atof(value); + OPT("pass") + { + int pass = x264_clip3( atoi(value), 0, 3 ); + p->rc.b_stat_write = pass & 1; + p->rc.b_stat_read = pass & 2; + } + OPT("stats") + { + p->rc.psz_stat_in = strdup(value); + p->rc.psz_stat_out = strdup(value); + } + OPT("qcomp") + p->rc.f_qcompress = atof(value); + OPT("mbtree") + p->rc.b_mb_tree = atobool(value); + OPT("qblur") + p->rc.f_qblur = atof(value); + OPT2("cplxblur", "cplx-blur") + p->rc.f_complexity_blur = atof(value); + OPT("zones") + p->rc.psz_zones = strdup(value); + OPT("crop-rect") + b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top, + &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4; + OPT("psnr") + p->analyse.b_psnr = atobool(value); + OPT("ssim") + p->analyse.b_ssim = atobool(value); + OPT("aud") + p->b_aud = atobool(value); + OPT("sps-id") + p->i_sps_id = atoi(value); + OPT("global-header") + p->b_repeat_headers = !atobool(value); + OPT("repeat-headers") + p->b_repeat_headers = atobool(value); + OPT("annexb") + p->b_annexb = atobool(value); + OPT("force-cfr") + p->b_vfr_input = !atobool(value); + OPT("nal-hrd") + b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd ); + OPT("filler") + p->rc.b_filler = atobool(value); + OPT("pic-struct") + p->b_pic_struct = atobool(value); + OPT("fake-interlaced") + p->b_fake_interlaced = atobool(value); + OPT("frame-packing") + p->i_frame_packing = atoi(value); + OPT("stitchable") + p->b_stitchable = atobool(value); + OPT("opencl") + p->b_opencl = atobool( value ); + OPT("opencl-clbin") + p->psz_clbin_file = strdup( value ); + OPT("opencl-device") + p->i_opencl_device = atoi( value ); + else + { + b_error = 1; + errortype = X264_PARAM_BAD_NAME; + } +#undef OPT +#undef OPT2 +#undef atobool +#undef atoi +#undef atof + + if( name_buf ) + free( name_buf ); + + b_error |= value_was_null && !name_was_bool; + return b_error ? errortype : 0; +} + +/**************************************************************************** + * x264_param2string: + ****************************************************************************/ +char *x264_param2string( x264_param_t *p, int b_res ) +{ + int len = 1000; + char *buf, *s; + if( p->rc.psz_zones ) + len += strlen(p->rc.psz_zones); + buf = s = x264_malloc( len ); + if( !buf ) + return NULL; + + if( b_res ) + { + s += sprintf( s, "%dx%d ", p->i_width, p->i_height ); + s += sprintf( s, "fps=%u/%u ", p->i_fps_num, p->i_fps_den ); + s += sprintf( s, "timebase=%u/%u ", p->i_timebase_num, p->i_timebase_den ); + s += sprintf( s, "bitdepth=%d ", p->i_bitdepth ); + } + + if( p->b_opencl ) + s += sprintf( s, "opencl=%d ", p->b_opencl ); + s += sprintf( s, "cabac=%d", p->b_cabac ); + s += sprintf( s, " ref=%d", p->i_frame_reference ); + s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter, + p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta ); + s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); + s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); + s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); + s += sprintf( s, " psy=%d", p->analyse.b_psy ); + if( p->analyse.b_psy ) + s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis ); + s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); + s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); + s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me ); + s += sprintf( s, " trellis=%d", p->analyse.i_trellis ); + s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 ); + s += sprintf( s, " cqm=%d", p->i_cqm_preset ); + s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] ); + s += sprintf( s, " fast_pskip=%d", p->analyse.b_fast_pskip ); + s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset ); + s += sprintf( s, " threads=%d", p->i_threads ); + s += sprintf( s, " lookahead_threads=%d", p->i_lookahead_threads ); + s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads ); + if( p->i_slice_count ) + s += sprintf( s, " slices=%d", p->i_slice_count ); + if( p->i_slice_count_max ) + s += sprintf( s, " slices_max=%d", p->i_slice_count_max ); + if( p->i_slice_max_size ) + s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size ); + if( p->i_slice_max_mbs ) + s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs ); + if( p->i_slice_min_mbs ) + s += sprintf( s, " slice_min_mbs=%d", p->i_slice_min_mbs ); + s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction ); + s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate ); + s += sprintf( s, " interlaced=%s", p->b_interlaced ? p->b_tff ? "tff" : "bff" : p->b_fake_interlaced ? "fake" : "0" ); + s += sprintf( s, " bluray_compat=%d", p->b_bluray_compat ); + if( p->b_stitchable ) + s += sprintf( s, " stitchable=%d", p->b_stitchable ); + + s += sprintf( s, " constrained_intra=%d", p->b_constrained_intra ); + + s += sprintf( s, " bframes=%d", p->i_bframe ); + if( p->i_bframe ) + { + s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d", + p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias, + p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop ); + } + s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 ); + + if( p->i_keyint_max == X264_KEYINT_MAX_INFINITE ) + s += sprintf( s, " keyint=infinite" ); + else + s += sprintf( s, " keyint=%d", p->i_keyint_max ); + s += sprintf( s, " keyint_min=%d scenecut=%d intra_refresh=%d", + p->i_keyint_min, p->i_scenecut_threshold, p->b_intra_refresh ); + + if( p->rc.b_mb_tree || p->rc.i_vbv_buffer_size ) + s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead ); + + s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ? + ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" ) + : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree ); + if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) + { + if( p->rc.i_rc_method == X264_RC_CRF ) + s += sprintf( s, " crf=%.1f", p->rc.f_rf_constant ); + else + s += sprintf( s, " bitrate=%d ratetol=%.1f", + p->rc.i_bitrate, p->rc.f_rate_tolerance ); + s += sprintf( s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d", + p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step ); + if( p->rc.b_stat_read ) + s += sprintf( s, " cplxblur=%.1f qblur=%.1f", + p->rc.f_complexity_blur, p->rc.f_qblur ); + if( p->rc.i_vbv_buffer_size ) + { + s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", + p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); + if( p->rc.i_rc_method == X264_RC_CRF ) + s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max ); + } + } + else if( p->rc.i_rc_method == X264_RC_CQP ) + s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); + + if( p->rc.i_vbv_buffer_size ) + s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler ); + if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom ) + s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top, + p->crop_rect.i_right, p->crop_rect.i_bottom ); + if( p->i_frame_packing >= 0 ) + s += sprintf( s, " frame-packing=%d", p->i_frame_packing ); + + if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) + { + s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); + if( p->i_bframe && !p->rc.b_mb_tree ) + s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); + s += sprintf( s, " aq=%d", p->rc.i_aq_mode ); + if( p->rc.i_aq_mode ) + s += sprintf( s, ":%.2f", p->rc.f_aq_strength ); + if( p->rc.psz_zones ) + s += sprintf( s, " zones=%s", p->rc.psz_zones ); + else if( p->rc.i_zones ) + s += sprintf( s, " zones" ); + } + + return buf; +} diff -Nru x264-0.152.2854+gite9a5903/common/base.h x264-0.158.2988+git-20191101.7817004/common/base.h --- x264-0.152.2854+gite9a5903/common/base.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/base.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,316 @@ +/***************************************************************************** + * base.h: misc common functions (bit depth independent) + ***************************************************************************** + * Copyright (C) 2003-2019 x264 project + * + * Authors: Laurent Aimar + * Loren Merritt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_BASE_H +#define X264_BASE_H + +/**************************************************************************** + * Macros (can be used in osdep.h) + ****************************************************************************/ +#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) ) +#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) ) +#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c))) +#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c))) +#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d))) +#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d))) + +/**************************************************************************** + * System includes + ****************************************************************************/ +#include "osdep.h" +#include +#include +#include +#include +#include +#include + +/**************************************************************************** + * Macros + ****************************************************************************/ +#define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 ) +#define FIX8(f) ((int)(f*(1<<8)+.5)) +#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0]))) +#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) +#define IS_DISPOSABLE(type) ( type == X264_TYPE_B ) + +/* Unions for type-punning. + * Mn: load or store n bits, aligned, native-endian + * CPn: copy n bits, aligned, native-endian + * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */ +typedef union { uint16_t i; uint8_t c[2]; } MAY_ALIAS x264_union16_t; +typedef union { uint32_t i; uint16_t b[2]; uint8_t c[4]; } MAY_ALIAS x264_union32_t; +typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t; +typedef struct { uint64_t i[2]; } x264_uint128_t; +typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t; +#define M16(src) (((x264_union16_t*)(src))->i) +#define M32(src) (((x264_union32_t*)(src))->i) +#define M64(src) (((x264_union64_t*)(src))->i) +#define M128(src) (((x264_union128_t*)(src))->i) +#define M128_ZERO ((x264_uint128_t){{0,0}}) +#define CP16(dst,src) M16(dst) = M16(src) +#define CP32(dst,src) M32(dst) = M32(src) +#define CP64(dst,src) M64(dst) = M64(src) +#define CP128(dst,src) M128(dst) = M128(src) + +/**************************************************************************** + * Constants + ****************************************************************************/ +enum profile_e +{ + PROFILE_BASELINE = 66, + PROFILE_MAIN = 77, + PROFILE_HIGH = 100, + PROFILE_HIGH10 = 110, + PROFILE_HIGH422 = 122, + PROFILE_HIGH444_PREDICTIVE = 244, +}; + +enum chroma_format_e +{ + CHROMA_400 = 0, + CHROMA_420 = 1, + CHROMA_422 = 2, + CHROMA_444 = 3, +}; + +enum slice_type_e +{ + SLICE_TYPE_P = 0, + SLICE_TYPE_B = 1, + SLICE_TYPE_I = 2, +}; + +static const char slice_type_to_char[] = { 'P', 'B', 'I' }; + +enum sei_payload_type_e +{ + SEI_BUFFERING_PERIOD = 0, + SEI_PIC_TIMING = 1, + SEI_PAN_SCAN_RECT = 2, + SEI_FILLER = 3, + SEI_USER_DATA_REGISTERED = 4, + SEI_USER_DATA_UNREGISTERED = 5, + SEI_RECOVERY_POINT = 6, + SEI_DEC_REF_PIC_MARKING = 7, + SEI_FRAME_PACKING = 45, + SEI_ALTERNATIVE_TRANSFER = 147, +}; + +#define X264_BFRAME_MAX 16 +#define X264_REF_MAX 16 +#define X264_THREAD_MAX 128 +#define X264_LOOKAHEAD_THREAD_MAX 16 +#define X264_LOOKAHEAD_MAX 250 + +// number of pixels (per thread) in progress at any given time. +// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety +#define X264_THREAD_HEIGHT 24 + +/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled + * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly + * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when + * real weights are being used. */ + +#define X264_WEIGHTP_FAKE (-1) + +#define X264_SCAN8_LUMA_SIZE (5*8) +#define X264_SCAN8_SIZE (X264_SCAN8_LUMA_SIZE*3) +#define X264_SCAN8_0 (4+1*8) + +/* Scan8 organization: + * 0 1 2 3 4 5 6 7 + * 0 DY y y y y y + * 1 y Y Y Y Y + * 2 y Y Y Y Y + * 3 y Y Y Y Y + * 4 y Y Y Y Y + * 5 DU u u u u u + * 6 u U U U U + * 7 u U U U U + * 8 u U U U U + * 9 u U U U U + * 10 DV v v v v v + * 11 v V V V V + * 12 v V V V V + * 13 v V V V V + * 14 v V V V V + * DY/DU/DV are for luma/chroma DC. + */ + +#define LUMA_DC 48 +#define CHROMA_DC 49 + +static const uint8_t x264_scan8[16*3 + 3] = +{ + 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, + 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, + 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, + 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, + 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, + 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, + 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, + 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, + 4+11*8, 5+11*8, 4+12*8, 5+12*8, + 6+11*8, 7+11*8, 6+12*8, 7+12*8, + 4+13*8, 5+13*8, 4+14*8, 5+14*8, + 6+13*8, 7+13*8, 6+14*8, 7+14*8, + 0+ 0*8, 0+ 5*8, 0+10*8 +}; + +/**************************************************************************** + * Includes + ****************************************************************************/ +#include "cpu.h" +#include "tables.h" + +/**************************************************************************** + * Inline functions + ****************************************************************************/ +static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max ) +{ + return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v ); +} + +static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max ) +{ + return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v ); +} + +/* Not a general-purpose function; multiplies input by -1/6 to convert + * qp to qscale. */ +static ALWAYS_INLINE int x264_exp2fix8( float x ) +{ + int i = x*(-64.f/6.f) + 512.5f; + if( i < 0 ) return 0; + if( i > 1023 ) return 0xffff; + return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8; +} + +static ALWAYS_INLINE float x264_log2( uint32_t x ) +{ + int lz = x264_clz( x ); + return x264_log2_lut[(x<>24)&0x7f] + x264_log2_lz_lut[lz]; +} + +static ALWAYS_INLINE int x264_median( int a, int b, int c ) +{ + int t = (a-b)&((a-b)>>31); + a -= t; + b += t; + b -= (b-c)&((b-c)>>31); + b += (a-b)&((a-b)>>31); + return b; +} + +static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c ) +{ + dst[0] = x264_median( a[0], b[0], c[0] ); + dst[1] = x264_median( a[1], b[1], c[1] ); +} + +static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc ) +{ + int sum = 0; + for( int i = 0; i < i_mvc-1; i++ ) + { + sum += abs( mvc[i][0] - mvc[i+1][0] ) + + abs( mvc[i][1] - mvc[i+1][1] ); + } + return sum; +} + +static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop ) +{ + int amvd0 = mvdleft[0] + mvdtop[0]; + int amvd1 = mvdleft[1] + mvdtop[1]; + amvd0 = (amvd0 > 2) + (amvd0 > 32); + amvd1 = (amvd1 > 2) + (amvd1 > 32); + return amvd0 + (amvd1<<8); +} + +/**************************************************************************** + * General functions + ****************************************************************************/ +X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d ); +X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d ); + +X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg ); +X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... ); + +/* x264_malloc : will do or emulate a memalign + * you have to use x264_free for buffers allocated with x264_malloc */ +X264_API void *x264_malloc( int ); +X264_API void x264_free( void * ); + +/* x264_slurp_file: malloc space for the whole file and read it */ +X264_API char *x264_slurp_file( const char *filename ); + +/* x264_param2string: return a (malloced) string containing most of + * the encoding options */ +X264_API char *x264_param2string( x264_param_t *p, int b_res ); + +/**************************************************************************** + * Macros + ****************************************************************************/ +#define CHECKED_MALLOC( var, size )\ +do {\ + var = x264_malloc( size );\ + if( !var )\ + goto fail;\ +} while( 0 ) +#define CHECKED_MALLOCZERO( var, size )\ +do {\ + CHECKED_MALLOC( var, size );\ + memset( var, 0, size );\ +} while( 0 ) + +/* Macros for merging multiple allocations into a single large malloc, for improved + * use with huge pages. */ + +/* Needs to be enough to contain any set of buffers that use combined allocations */ +#define PREALLOC_BUF_SIZE 1024 + +#define PREALLOC_INIT\ + int prealloc_idx = 0;\ + size_t prealloc_size = 0;\ + uint8_t **preallocs[PREALLOC_BUF_SIZE]; + +#define PREALLOC( var, size )\ +do {\ + var = (void*)prealloc_size;\ + preallocs[prealloc_idx++] = (uint8_t**)&var;\ + prealloc_size += ALIGN(size, NATIVE_ALIGN);\ +} while( 0 ) + +#define PREALLOC_END( ptr )\ +do {\ + CHECKED_MALLOC( ptr, prealloc_size );\ + while( prealloc_idx-- )\ + *preallocs[prealloc_idx] += (intptr_t)ptr;\ +} while( 0 ) + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/bitstream.c x264-0.158.2988+git-20191101.7817004/common/bitstream.c --- x264-0.152.2854+gite9a5903/common/bitstream.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/bitstream.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.c: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Fiona Glaser @@ -26,7 +26,7 @@ #include "common.h" -static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end ) +static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end ) { if( src < end ) *dst++ = *src++; if( src < end ) *dst++ = *src++; @@ -39,25 +39,15 @@ return dst; } -uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end ); -uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end ); -uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end ); -void x264_cabac_block_residual_rd_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_rd_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_rd_internal_ssse3 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_rd_internal_avx512 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_8x8_rd_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_8x8_rd_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_8x8_rd_internal_ssse3 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_8x8_rd_internal_avx512 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_internal_avx2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); -void x264_cabac_block_residual_internal_avx512( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); - -uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end ); +#if HAVE_MMX +#include "x86/bitstream.h" +#endif +#if HAVE_ARMV6 +#include "arm/bitstream.h" +#endif +#if HAVE_AARCH64 +#include "aarch64/bitstream.h" +#endif /**************************************************************************** * x264_nal_encode: @@ -117,7 +107,7 @@ { memset( pf, 0, sizeof(*pf) ); - pf->nal_escape = x264_nal_escape_c; + pf->nal_escape = nal_escape_c; #if HAVE_MMX #if ARCH_X86_64 && !defined( __MACH__ ) pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2; @@ -169,7 +159,7 @@ if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif diff -Nru x264-0.152.2854+gite9a5903/common/bitstream.h x264-0.158.2988+git-20191101.7817004/common/bitstream.h --- x264-0.152.2854+gite9a5903/common/bitstream.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/bitstream.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -30,12 +30,6 @@ typedef struct { - uint8_t i_bits; - uint8_t i_size; -} vlc_t; - -typedef struct -{ uint16_t i_bits; uint8_t i_size; /* Next level table to use */ @@ -60,12 +54,6 @@ ALIGNED_16( dctcoef level[18] ); } x264_run_level_t; -extern const vlc_t x264_coeff0_token[6]; -extern const vlc_t x264_coeff_token[6][16][4]; -extern const vlc_t x264_total_zeros[15][16]; -extern const vlc_t x264_total_zeros_2x2_dc[3][4]; -extern const vlc_t x264_total_zeros_2x4_dc[7][8]; - typedef struct { uint8_t *(*nal_escape)( uint8_t *dst, uint8_t *src, uint8_t *end ); @@ -77,6 +65,7 @@ intptr_t ctx_block_cat, x264_cabac_t *cb ); } x264_bitstream_function_t; +#define x264_bitstream_init x264_template(bitstream_init) void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf ); /* A larger level table size theoretically could help a bit at extremely @@ -85,11 +74,13 @@ * This size appears to be optimal for QP18 encoding on a Nehalem CPU. * FIXME: Do further testing? */ #define LEVEL_TABLE_SIZE 128 +#define x264_level_token x264_template(level_token) extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE]; /* The longest possible set of zero run codes sums to 25 bits. This leaves * plenty of room for both the code (25 bits) and size (5 bits) in a uint32_t. */ +#define x264_run_before x264_template(run_before) extern uint32_t x264_run_before[1<<16]; static inline void bs_init( bs_t *s, void *p_data, int i_data ) @@ -98,8 +89,13 @@ s->p = s->p_start = (uint8_t*)p_data - offset; s->p_end = (uint8_t*)p_data + i_data; s->i_left = (WORD_SIZE - offset)*8; - s->cur_bits = endian_fix32( M32(s->p) ); - s->cur_bits >>= (4-offset)*8; + if( offset ) + { + s->cur_bits = endian_fix32( M32(s->p) ); + s->cur_bits >>= (4-offset)*8; + } + else + s->cur_bits = 0; } static inline int bs_pos( bs_t *s ) { @@ -197,6 +193,7 @@ { if( s->i_left&7 ) bs_write( s, s->i_left&7, 1 << ( (s->i_left&7) - 1 ) ); + bs_flush( s ); } /* golomb functions */ diff -Nru x264-0.152.2854+gite9a5903/common/cabac.c x264-0.158.2988+git-20191101.7817004/common/cabac.c --- x264-0.152.2854+gite9a5903/common/cabac.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/cabac.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -27,1305 +27,7 @@ #include "common.h" - -static const int8_t x264_cabac_context_init_I[1024][2] = -{ - /* 0 - 10 */ - { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, - { 2, 54 }, { 3, 74 }, { -28,127 }, { -23, 104 }, - { -6, 53 }, { -1, 54 }, { 7, 51 }, - - /* 11 - 23 unused for I */ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, - - /* 24- 39 */ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - - /* 40 - 53 */ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, - - /* 54 - 59 */ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 }, - - /* 60 - 69 */ - { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, - { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, - { 13, 41 }, { 3, 62 }, - - /* 70 -> 87 */ - { 0, 11 }, { 1, 55 }, { 0, 69 }, { -17, 127 }, - { -13, 102 },{ 0, 82 }, { -7, 74 }, { -21, 107 }, - { -27, 127 },{ -31, 127 },{ -24, 127 }, { -18, 95 }, - { -27, 127 },{ -21, 114 },{ -30, 127 }, { -17, 123 }, - { -12, 115 },{ -16, 122 }, - - /* 88 -> 104 */ - { -11, 115 },{ -12, 63 }, { -2, 68 }, { -15, 84 }, - { -13, 104 },{ -3, 70 }, { -8, 93 }, { -10, 90 }, - { -30, 127 },{ -1, 74 }, { -6, 97 }, { -7, 91 }, - { -20, 127 },{ -4, 56 }, { -5, 82 }, { -7, 76 }, - { -22, 125 }, - - /* 105 -> 135 */ - { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, - { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, - { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, - { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, - { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, - { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, - { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, - { 14, 62 }, { -13, 108 },{ -15, 100 }, - - /* 136 -> 165 */ - { -13, 101 },{ -13, 91 }, { -12, 94 }, { -10, 88 }, - { -16, 84 }, { -10, 86 }, { -7, 83 }, { -13, 87 }, - { -19, 94 }, { 1, 70 }, { 0, 72 }, { -5, 74 }, - { 18, 59 }, { -8, 102 }, { -15, 100 }, { 0, 95 }, - { -4, 75 }, { 2, 72 }, { -11, 75 }, { -3, 71 }, - { 15, 46 }, { -13, 69 }, { 0, 62 }, { 0, 65 }, - { 21, 37 }, { -15, 72 }, { 9, 57 }, { 16, 54 }, - { 0, 62 }, { 12, 72 }, - - /* 166 -> 196 */ - { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, - { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, - { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, - { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, - { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, - { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, - { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, - { 0, 89 }, { 26, -19 }, { 22, -17 }, - - /* 197 -> 226 */ - { 26, -17 }, { 30, -25 }, { 28, -20 }, { 33, -23 }, - { 37, -27 }, { 33, -23 }, { 40, -28 }, { 38, -17 }, - { 33, -11 }, { 40, -15 }, { 41, -6 }, { 38, 1 }, - { 41, 17 }, { 30, -6 }, { 27, 3 }, { 26, 22 }, - { 37, -16 }, { 35, -4 }, { 38, -8 }, { 38, -3 }, - { 37, 3 }, { 38, 5 }, { 42, 0 }, { 35, 16 }, - { 39, 22 }, { 14, 48 }, { 27, 37 }, { 21, 60 }, - { 12, 68 }, { 2, 97 }, - - /* 227 -> 251 */ - { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, - { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, - { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, - { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, - { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, - { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, - { -4, 65 }, - - /* 252 -> 275 */ - { -12, 73 }, { -8, 76 }, { -7, 80 }, { -9, 88 }, - { -17, 110 },{ -11, 97 }, { -20, 84 }, { -11, 79 }, - { -6, 73 }, { -4, 74 }, { -13, 86 }, { -13, 96 }, - { -11, 97 }, { -19, 117 },{ -8, 78 }, { -5, 33 }, - { -4, 48 }, { -2, 53 }, { -3, 62 }, { -13, 71 }, - { -10, 79 }, { -12, 86 }, { -13, 90 }, { -14, 97 }, - - /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ - { 0, 0 }, - - /* 277 -> 307 */ - { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, - { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, - { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, - { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, - { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, - { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, - { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, - { 9, 64 }, { -12, 104 },{ -11, 97 }, - - /* 308 -> 337 */ - { -16, 96 }, { -7, 88 }, { -8, 85 }, { -7, 85 }, - { -9, 85 }, { -13, 88 }, { 4, 66 }, { -3, 77 }, - { -3, 76 }, { -6, 76 }, { 10, 58 }, { -1, 76 }, - { -1, 83 }, { -7, 99 }, { -14, 95 }, { 2, 95 }, - { 0, 76 }, { -5, 74 }, { 0, 70 }, { -11, 75 }, - { 1, 68 }, { 0, 65 }, { -14, 73 }, { 3, 62 }, - { 4, 62 }, { -1, 68 }, { -13, 75 }, { 11, 55 }, - { 5, 64 }, { 12, 70 }, - - /* 338 -> 368 */ - { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, - { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, - { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, - { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, - { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, - { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, - { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, - { -12, 109 },{ 36, -35 }, { 36, -34 }, - - /* 369 -> 398 */ - { 32, -26 }, { 37, -30 }, { 44, -32 }, { 34, -18 }, - { 34, -15 }, { 40, -15 }, { 33, -7 }, { 35, -5 }, - { 33, 0 }, { 38, 2 }, { 33, 13 }, { 23, 35 }, - { 13, 58 }, { 29, -3 }, { 26, 0 }, { 22, 30 }, - { 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 }, - { 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 }, - { 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 }, - { 29, 39 }, { 19, 66 }, - - /* 399 -> 435 */ - { 31, 21 }, { 31, 31 }, { 25, 50 }, - { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, - { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, - { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, - { -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 }, - { 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 }, - { 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 }, - { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, - { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, - { 0, 68 }, { -9, 92 }, - - /* 436 -> 459 */ - { -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 }, - { -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 }, - { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 }, - { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 }, - { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, - { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, - - /* 460 -> 1024 */ - { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, - { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, - { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, - { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, - { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, - { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, - { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, - { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, - { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, - { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, - { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, - { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, - { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, - { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, - { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, - { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, - { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, - { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, - { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, - { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, - { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, - { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, - { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, - { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, - { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, - { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, - { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, - { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, - { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, - { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, - { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, - { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, - { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, - { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, - { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, - { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, - { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, - { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, - { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, - { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, - { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, - { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, - { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, - { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, - { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, - { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, - { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, - { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, - { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, - { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, - { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, - { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, - { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, - { -23, 68 }, { -24, 50 }, { -11, 74 }, { -14, 106 }, - { -13, 97 }, { -15, 90 }, { -12, 90 }, { -18, 88 }, - { -10, 73 }, { -9, 79 }, { -14, 86 }, { -10, 73 }, - { -10, 70 }, { -10, 69 }, { -5, 66 }, { -9, 64 }, - { -5, 58 }, { 2, 59 }, { 23, -13 }, { 26, -13 }, - { 40, -15 }, { 49, -14 }, { 44, 3 }, { 45, 6 }, - { 44, 34 }, { 33, 54 }, { 19, 82 }, { 21, -10 }, - { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, - { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, - { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, - { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, - { 0, 68 }, { -9, 92 }, { -17, 120 }, { -20, 112 }, - { -18, 114 }, { -11, 85 }, { -15, 92 }, { -14, 89 }, - { -26, 71 }, { -15, 81 }, { -14, 80 }, { 0, 68 }, - { -14, 70 }, { -24, 56 }, { -23, 68 }, { -24, 50 }, - { -11, 74 }, { -14, 106 }, { -13, 97 }, { -15, 90 }, - { -12, 90 }, { -18, 88 }, { -10, 73 }, { -9, 79 }, - { -14, 86 }, { -10, 73 }, { -10, 70 }, { -10, 69 }, - { -5, 66 }, { -9, 64 }, { -5, 58 }, { 2, 59 }, - { 23, -13 }, { 26, -13 }, { 40, -15 }, { 49, -14 }, - { 44, 3 }, { 45, 6 }, { 44, 34 }, { 33, 54 }, - { 19, 82 }, { 21, -10 }, { 24, -11 }, { 28, -8 }, - { 28, -1 }, { 29, 3 }, { 29, 9 }, { 35, 20 }, - { 29, 36 }, { 14, 67 }, { -3, 75 }, { -1, 23 }, - { 1, 34 }, { 1, 43 }, { 0, 54 }, { -2, 55 }, - { 0, 61 }, { 1, 64 }, { 0, 68 }, { -9, 92 }, - { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, - { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, - { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, - { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, - { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, - { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, - { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, - { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, - { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, - { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, - { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, - { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, - { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, - { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, - { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, - { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, - { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, - { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, - { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, - { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, - { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, - { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, - { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, - { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, - { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, - { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, - { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, - { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, - { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, - { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, - { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, - { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, - { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, - { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, - { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, - { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, - { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, - { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, - { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, - { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, - { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, - { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, - { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, - { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, - { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, - { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, - { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, - { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, - { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, - { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, - { -4, 65 }, { -12, 73 }, { -8, 76 }, { -7, 80 }, - { -9, 88 }, { -17, 110 }, { -3, 71 }, { -6, 42 }, - { -5, 50 }, { -3, 54 }, { -2, 62 }, { 0, 58 }, - { 1, 63 }, { -2, 72 }, { -1, 74 }, { -9, 91 }, - { -5, 67 }, { -5, 27 }, { -3, 39 }, { -2, 44 }, - { 0, 46 }, { -16, 64 }, { -8, 68 }, { -10, 78 }, - { -6, 77 }, { -10, 86 }, { -12, 92 }, { -15, 55 }, - { -10, 60 }, { -6, 62 }, { -4, 65 }, { -12, 73 }, - { -8, 76 }, { -7, 80 }, { -9, 88 }, { -17, 110 }, - { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, - { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, - { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 } -}; - -static const int8_t x264_cabac_context_init_PB[3][1024][2] = -{ - /* i_cabac_init_idc == 0 */ - { - /* 0 - 10 */ - { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, - { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, - { -6, 53 }, { -1, 54 }, { 7, 51 }, - - /* 11 - 23 */ - { 23, 33 }, { 23, 2 }, { 21, 0 }, { 1, 9 }, - { 0, 49 }, { -37, 118 }, { 5, 57 }, { -13, 78 }, - { -11, 65 }, { 1, 62 }, { 12, 49 }, { -4, 73 }, - { 17, 50 }, - - /* 24 - 39 */ - { 18, 64 }, { 9, 43 }, { 29, 0 }, { 26, 67 }, - { 16, 90 }, { 9, 104 }, { -46, 127 }, { -20, 104 }, - { 1, 67 }, { -13, 78 }, { -11, 65 }, { 1, 62 }, - { -6, 86 }, { -17, 95 }, { -6, 61 }, { 9, 45 }, - - /* 40 - 53 */ - { -3, 69 }, { -6, 81 }, { -11, 96 }, { 6, 55 }, - { 7, 67 }, { -5, 86 }, { 2, 88 }, { 0, 58 }, - { -3, 76 }, { -10, 94 }, { 5, 54 }, { 4, 69 }, - { -3, 81 }, { 0, 88 }, - - /* 54 - 59 */ - { -7, 67 }, { -5, 74 }, { -4, 74 }, { -5, 80 }, - { -7, 72 }, { 1, 58 }, - - /* 60 - 69 */ - { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, - { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, - { 13, 41 }, { 3, 62 }, - - /* 70 - 87 */ - { 0, 45 }, { -4, 78 }, { -3, 96 }, { -27, 126 }, - { -28, 98 }, { -25, 101 }, { -23, 67 }, { -28, 82 }, - { -20, 94 }, { -16, 83 }, { -22, 110 }, { -21, 91 }, - { -18, 102 }, { -13, 93 }, { -29, 127 }, { -7, 92 }, - { -5, 89 }, { -7, 96 }, { -13, 108 }, { -3, 46 }, - { -1, 65 }, { -1, 57 }, { -9, 93 }, { -3, 74 }, - { -9, 92 }, { -8, 87 }, { -23, 126 }, { 5, 54 }, - { 6, 60 }, { 6, 59 }, { 6, 69 }, { -1, 48 }, - { 0, 68 }, { -4, 69 }, { -8, 88 }, - - /* 105 -> 165 */ - { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, - { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, - { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, - { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, - { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, - { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, - { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, - { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, - { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, - { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, - { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, - { 3, 64 }, { 1, 61 }, { 9, 63 }, { 7, 50 }, - { 16, 39 }, { 5, 44 }, { 4, 52 }, { 11, 48 }, - { -5, 60 }, { -1, 59 }, { 0, 59 }, { 22, 33 }, - { 5, 44 }, { 14, 43 }, { -1, 78 }, { 0, 60 }, - { 9, 69 }, - - /* 166 - 226 */ - { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, - { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, - { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, - { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, - { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, - { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, - { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, - { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, - { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, - { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, - { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, - { 1, 67 }, { 5, 59 }, { 9, 67 }, { 16, 30 }, - { 18, 32 }, { 18, 35 }, { 22, 29 }, { 24, 31 }, - { 23, 38 }, { 18, 43 }, { 20, 41 }, { 11, 63 }, - { 9, 59 }, { 9, 64 }, { -1, 94 }, { -2, 89 }, - { -9, 108 }, - - /* 227 - 275 */ - { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, - { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, - { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, - { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, - { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, - { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, - { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, - { -3, 74 }, { -10, 90 }, { 0, 70 }, { -4, 29 }, - { 5, 31 }, { 7, 42 }, { 1, 59 }, { -2, 58 }, - { -3, 72 }, { -3, 81 }, { -11, 97 }, { 0, 58 }, - { 8, 5 }, { 10, 14 }, { 14, 18 }, { 13, 27 }, - { 2, 40 }, { 0, 58 }, { -3, 70 }, { -6, 79 }, - { -8, 85 }, - - /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ - { 0, 0 }, - - /* 277 - 337 */ - { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, - { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, - { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, - { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, - { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, - { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, - { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, - { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, - { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, - { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, - { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, - { -2, 69 }, { -2, 59 }, { 6, 70 }, { 10, 44 }, - { 9, 31 }, { 12, 43 }, { 3, 53 }, { 14, 34 }, - { 10, 38 }, { -3, 52 }, { 13, 40 }, { 17, 32 }, - { 7, 44 }, { 7, 38 }, { 13, 50 }, { 10, 57 }, - { 26, 43 }, - - /* 338 - 398 */ - { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, - { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, - { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, - { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, - { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, - { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, - { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, - { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, - { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, - { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, - { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, - { 8, 60 }, { 6, 63 }, { 17, 65 }, { 21, 24 }, - { 23, 20 }, { 26, 23 }, { 27, 32 }, { 28, 23 }, - { 28, 24 }, { 23, 40 }, { 24, 32 }, { 28, 29 }, - { 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 }, - { 11, 86 }, - - /* 399 -> 435 */ - { 12, 40 }, { 11, 51 }, { 14, 59 }, - { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, - { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, - { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, - { -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 }, - { 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 }, - { 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 }, - { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, - { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, - { -8, 66 }, { -8, 76 }, - - /* 436 -> 459 */ - { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, - { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, - { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, - { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 }, - { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, - { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, - - /* 460 - 1024 */ - { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, - { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, - { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, - { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, - { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, - { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, - { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, - { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, - { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, - { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, - { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, - { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, - { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, - { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, - { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, - { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, - { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, - { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, - { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, - { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, - { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, - { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, - { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, - { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, - { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, - { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, - { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, - { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, - { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, - { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, - { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, - { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, - { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, - { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, - { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, - { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, - { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, - { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, - { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, - { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, - { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, - { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, - { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, - { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, - { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, - { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, - { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, - { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, - { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, - { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, - { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, - { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, - { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, - { -16, 66 }, { -22, 65 }, { -20, 63 }, { -5, 85 }, - { -6, 81 }, { -10, 77 }, { -7, 81 }, { -17, 80 }, - { -18, 73 }, { -4, 74 }, { -10, 83 }, { -9, 71 }, - { -9, 67 }, { -1, 61 }, { -8, 66 }, { -14, 66 }, - { 0, 59 }, { 2, 59 }, { 9, -2 }, { 26, -9 }, - { 33, -9 }, { 39, -7 }, { 41, -2 }, { 45, 3 }, - { 49, 9 }, { 45, 27 }, { 36, 59 }, { 21, -13 }, - { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, - { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, - { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, - { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, - { -8, 66 }, { -8, 76 }, { -4, 79 }, { -7, 71 }, - { -5, 69 }, { -9, 70 }, { -8, 66 }, { -10, 68 }, - { -19, 73 }, { -12, 69 }, { -16, 70 }, { -15, 67 }, - { -20, 62 }, { -19, 70 }, { -16, 66 }, { -22, 65 }, - { -20, 63 }, { -5, 85 }, { -6, 81 }, { -10, 77 }, - { -7, 81 }, { -17, 80 }, { -18, 73 }, { -4, 74 }, - { -10, 83 }, { -9, 71 }, { -9, 67 }, { -1, 61 }, - { -8, 66 }, { -14, 66 }, { 0, 59 }, { 2, 59 }, - { 9, -2 }, { 26, -9 }, { 33, -9 }, { 39, -7 }, - { 41, -2 }, { 45, 3 }, { 49, 9 }, { 45, 27 }, - { 36, 59 }, { 21, -13 }, { 33, -14 }, { 39, -7 }, - { 46, -2 }, { 51, 2 }, { 60, 6 }, { 61, 17 }, - { 55, 34 }, { 42, 62 }, { -6, 66 }, { -7, 35 }, - { -7, 42 }, { -8, 45 }, { -5, 48 }, { -12, 56 }, - { -6, 60 }, { -5, 62 }, { -8, 66 }, { -8, 76 }, - { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, - { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, - { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, - { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, - { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, - { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, - { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, - { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, - { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, - { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, - { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, - { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, - { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, - { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, - { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, - { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, - { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, - { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, - { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, - { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, - { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, - { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, - { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, - { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, - { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, - { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, - { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, - { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, - { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, - { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, - { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, - { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, - { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, - { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, - { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, - { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, - { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, - { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, - { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, - { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, - { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, - { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, - { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, - { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, - { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, - { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, - { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, - { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, - { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, - { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, - { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, - { -3, 74 }, { -10, 90 }, { -6, 76 }, { -2, 44 }, - { 0, 45 }, { 0, 52 }, { -3, 64 }, { -2, 59 }, - { -4, 70 }, { -4, 75 }, { -8, 82 }, { -17, 102 }, - { -9, 77 }, { 3, 24 }, { 0, 42 }, { 0, 48 }, - { 0, 55 }, { -6, 59 }, { -7, 71 }, { -12, 83 }, - { -11, 87 }, { -30, 119 }, { 1, 58 }, { -3, 29 }, - { -1, 36 }, { 1, 38 }, { 2, 43 }, { -6, 55 }, - { 0, 58 }, { 0, 64 }, { -3, 74 }, { -10, 90 }, - { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, - { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, - { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 } - }, - - /* i_cabac_init_idc == 1 */ - { - /* 0 - 10 */ - { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, - { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, - { -6, 53 }, { -1, 54 }, { 7, 51 }, - - /* 11 - 23 */ - { 22, 25 }, { 34, 0 }, { 16, 0 }, { -2, 9 }, - { 4, 41 }, { -29, 118 }, { 2, 65 }, { -6, 71 }, - { -13, 79 }, { 5, 52 }, { 9, 50 }, { -3, 70 }, - { 10, 54 }, - - /* 24 - 39 */ - { 26, 34 }, { 19, 22 }, { 40, 0 }, { 57, 2 }, - { 41, 36 }, { 26, 69 }, { -45, 127 }, { -15, 101 }, - { -4, 76 }, { -6, 71 }, { -13, 79 }, { 5, 52 }, - { 6, 69 }, { -13, 90 }, { 0, 52 }, { 8, 43 }, - - /* 40 - 53 */ - { -2, 69 },{ -5, 82 },{ -10, 96 },{ 2, 59 }, - { 2, 75 },{ -3, 87 },{ -3, 100 },{ 1, 56 }, - { -3, 74 },{ -6, 85 },{ 0, 59 },{ -3, 81 }, - { -7, 86 },{ -5, 95 }, - - /* 54 - 59 */ - { -1, 66 },{ -1, 77 },{ 1, 70 },{ -2, 86 }, - { -5, 72 },{ 0, 61 }, - - /* 60 - 69 */ - { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, - { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, - { 13, 41 }, { 3, 62 }, - - /* 70 - 104 */ - { 13, 15 }, { 7, 51 }, { 2, 80 }, { -39, 127 }, - { -18, 91 }, { -17, 96 }, { -26, 81 }, { -35, 98 }, - { -24, 102 }, { -23, 97 }, { -27, 119 }, { -24, 99 }, - { -21, 110 }, { -18, 102 }, { -36, 127 }, { 0, 80 }, - { -5, 89 }, { -7, 94 }, { -4, 92 }, { 0, 39 }, - { 0, 65 }, { -15, 84 }, { -35, 127 }, { -2, 73 }, - { -12, 104 }, { -9, 91 }, { -31, 127 }, { 3, 55 }, - { 7, 56 }, { 7, 55 }, { 8, 61 }, { -3, 53 }, - { 0, 68 }, { -7, 74 }, { -9, 88 }, - - /* 105 -> 165 */ - { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, - { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, - { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, - { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, - { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, - { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, - { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, - { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, - { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, - { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, - { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, - { -4, 71 }, { 0, 58 }, { 7, 61 }, { 9, 41 }, - { 18, 25 }, { 9, 32 }, { 5, 43 }, { 9, 47 }, - { 0, 44 }, { 0, 51 }, { 2, 46 }, { 19, 38 }, - { -4, 66 }, { 15, 38 }, { 12, 42 }, { 9, 34 }, - { 0, 89 }, - - /* 166 - 226 */ - { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, - { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, - { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, - { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, - { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, - { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, - { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, - { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, - { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, - { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, - { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, - { 0, 75 }, { 2, 72 }, { 8, 77 }, { 14, 35 }, - { 18, 31 }, { 17, 35 }, { 21, 30 }, { 17, 45 }, - { 20, 42 }, { 18, 45 }, { 27, 26 }, { 16, 54 }, - { 7, 66 }, { 16, 56 }, { 11, 73 }, { 10, 67 }, - { -10, 116 }, - - /* 227 - 275 */ - { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, - { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, - { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, - { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, - { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, - { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, - { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, - { -5, 74 }, { -9, 86 }, { 2, 66 }, { -9, 34 }, - { 1, 32 }, { 11, 31 }, { 5, 52 }, { -2, 55 }, - { -2, 67 }, { 0, 73 }, { -8, 89 }, { 3, 52 }, - { 7, 4 }, { 10, 8 }, { 17, 8 }, { 16, 19 }, - { 3, 37 }, { -1, 61 }, { -5, 73 }, { -1, 70 }, - { -4, 78 }, - - /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ - { 0, 0 }, - - /* 277 - 337 */ - { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, - { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, - { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, - { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, - { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, - { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, - { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, - { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, - { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, - { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, - { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, - { -1, 70 }, { -9, 72 }, { 14, 60 }, { 16, 37 }, - { 0, 47 }, { 18, 35 }, { 11, 37 }, { 12, 41 }, - { 10, 41 }, { 2, 48 }, { 12, 41 }, { 13, 41 }, - { 0, 59 }, { 3, 50 }, { 19, 40 }, { 3, 66 }, - { 18, 50 }, - - /* 338 - 398 */ - { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, - { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, - { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, - { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, - { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, - { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, - { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, - { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, - { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, - { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, - { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, - { 12, 48 }, { 11, 49 }, { 26, 45 }, { 22, 22 }, - { 23, 22 }, { 27, 21 }, { 33, 20 }, { 26, 28 }, - { 30, 24 }, { 27, 34 }, { 18, 42 }, { 25, 39 }, - { 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 }, - { 11, 83 }, - - /* 399 -> 435 */ - { 25, 32 }, { 21, 49 }, { 21, 54 }, - { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, - { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, - { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, - { -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 }, - { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, - { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, - { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, - { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, - { -4, 67 }, { -7, 82 }, - - /* 436 -> 459 */ - { -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 }, - { -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 }, - { -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 }, - { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 }, - { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, - { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, - - /* 460 - 1024 */ - { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, - { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, - { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, - { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, - { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, - { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, - { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, - { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, - { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, - { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, - { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, - { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, - { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, - { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, - { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, - { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, - { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, - { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, - { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, - { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, - { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, - { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, - { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, - { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, - { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, - { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, - { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, - { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, - { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, - { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, - { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, - { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, - { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, - { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, - { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, - { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, - { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, - { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, - { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, - { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, - { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, - { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, - { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, - { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, - { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, - { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, - { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, - { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, - { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, - { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, - { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, - { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, - { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, - { -14, 66 }, { 0, 59 }, { 2, 59 }, { -3, 81 }, - { -3, 76 }, { -7, 72 }, { -6, 78 }, { -12, 72 }, - { -14, 68 }, { -3, 70 }, { -6, 76 }, { -5, 66 }, - { -5, 62 }, { 0, 57 }, { -4, 61 }, { -9, 60 }, - { 1, 54 }, { 2, 58 }, { 17, -10 }, { 32, -13 }, - { 42, -9 }, { 49, -5 }, { 53, 0 }, { 64, 3 }, - { 68, 10 }, { 66, 27 }, { 47, 57 }, { 17, -10 }, - { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, - { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, - { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, - { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, - { -4, 67 }, { -7, 82 }, { -5, 85 }, { -6, 81 }, - { -10, 77 }, { -7, 81 }, { -17, 80 }, { -18, 73 }, - { -4, 74 }, { -10, 83 }, { -9, 71 }, { -9, 67 }, - { -1, 61 }, { -8, 66 }, { -14, 66 }, { 0, 59 }, - { 2, 59 }, { -3, 81 }, { -3, 76 }, { -7, 72 }, - { -6, 78 }, { -12, 72 }, { -14, 68 }, { -3, 70 }, - { -6, 76 }, { -5, 66 }, { -5, 62 }, { 0, 57 }, - { -4, 61 }, { -9, 60 }, { 1, 54 }, { 2, 58 }, - { 17, -10 }, { 32, -13 }, { 42, -9 }, { 49, -5 }, - { 53, 0 }, { 64, 3 }, { 68, 10 }, { 66, 27 }, - { 47, 57 }, { 17, -10 }, { 32, -13 }, { 42, -9 }, - { 49, -5 }, { 53, 0 }, { 64, 3 }, { 68, 10 }, - { 66, 27 }, { 47, 57 }, { -5, 71 }, { 0, 24 }, - { -1, 36 }, { -2, 42 }, { -2, 52 }, { -9, 57 }, - { -6, 63 }, { -4, 65 }, { -4, 67 }, { -7, 82 }, - { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, - { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, - { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, - { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, - { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, - { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, - { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, - { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, - { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, - { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, - { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, - { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, - { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, - { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, - { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, - { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, - { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, - { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, - { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, - { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, - { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, - { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, - { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, - { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, - { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, - { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, - { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, - { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, - { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, - { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, - { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, - { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, - { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, - { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, - { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, - { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, - { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, - { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, - { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, - { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, - { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, - { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, - { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, - { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, - { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, - { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, - { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, - { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, - { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, - { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, - { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, - { -5, 74 }, { -9, 86 }, { -23, 112 }, { -15, 71 }, - { -7, 61 }, { 0, 53 }, { -5, 66 }, { -11, 77 }, - { -9, 80 }, { -9, 84 }, { -10, 87 }, { -34, 127 }, - { -21, 101 }, { -3, 39 }, { -5, 53 }, { -7, 61 }, - { -11, 75 }, { -15, 77 }, { -17, 91 }, { -25, 107 }, - { -25, 111 }, { -28, 122 }, { -11, 76 }, { -10, 44 }, - { -10, 52 }, { -10, 57 }, { -9, 58 }, { -16, 72 }, - { -7, 69 }, { -4, 69 }, { -5, 74 }, { -9, 86 }, - { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, - { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, - { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 } - }, - - /* i_cabac_init_idc == 2 */ - { - /* 0 - 10 */ - { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, - { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, - { -6, 53 }, { -1, 54 }, { 7, 51 }, - - /* 11 - 23 */ - { 29, 16 }, { 25, 0 }, { 14, 0 }, { -10, 51 }, - { -3, 62 }, { -27, 99 }, { 26, 16 }, { -4, 85 }, - { -24, 102 }, { 5, 57 }, { 6, 57 }, { -17, 73 }, - { 14, 57 }, - - /* 24 - 39 */ - { 20, 40 }, { 20, 10 }, { 29, 0 }, { 54, 0 }, - { 37, 42 }, { 12, 97 }, { -32, 127 }, { -22, 117 }, - { -2, 74 }, { -4, 85 }, { -24, 102 }, { 5, 57 }, - { -6, 93 }, { -14, 88 }, { -6, 44 }, { 4, 55 }, - - /* 40 - 53 */ - { -11, 89 },{ -15, 103 },{ -21, 116 },{ 19, 57 }, - { 20, 58 },{ 4, 84 },{ 6, 96 },{ 1, 63 }, - { -5, 85 },{ -13, 106 },{ 5, 63 },{ 6, 75 }, - { -3, 90 },{ -1, 101 }, - - /* 54 - 59 */ - { 3, 55 },{ -4, 79 },{ -2, 75 },{ -12, 97 }, - { -7, 50 },{ 1, 60 }, - - /* 60 - 69 */ - { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, - { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, - { 13, 41 }, { 3, 62 }, - - /* 70 - 104 */ - { 7, 34 }, { -9, 88 }, { -20, 127 }, { -36, 127 }, - { -17, 91 }, { -14, 95 }, { -25, 84 }, { -25, 86 }, - { -12, 89 }, { -17, 91 }, { -31, 127 }, { -14, 76 }, - { -18, 103 }, { -13, 90 }, { -37, 127 }, { 11, 80 }, - { 5, 76 }, { 2, 84 }, { 5, 78 }, { -6, 55 }, - { 4, 61 }, { -14, 83 }, { -37, 127 }, { -5, 79 }, - { -11, 104 }, { -11, 91 }, { -30, 127 }, { 0, 65 }, - { -2, 79 }, { 0, 72 }, { -4, 92 }, { -6, 56 }, - { 3, 68 }, { -8, 71 }, { -13, 98 }, - - /* 105 -> 165 */ - { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, - { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, - { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, - { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, - { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, - { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, - { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, - { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, - { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, - { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, - { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, - { 3, 65 }, { -7, 69 }, { 8, 77 }, { -10, 66 }, - { 3, 62 }, { -3, 68 }, { -20, 81 }, { 0, 30 }, - { 1, 7 }, { -3, 23 }, { -21, 74 }, { 16, 66 }, - { -23, 124 }, { 17, 37 }, { 44, -18 }, { 50, -34 }, - { -22, 127 }, - - /* 166 - 226 */ - { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, - { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, - { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, - { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, - { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, - { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, - { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, - { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, - { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, - { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, - { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, - { 20, 34 }, { 19, 31 }, { 27, 44 }, { 19, 16 }, - { 15, 36 }, { 15, 36 }, { 21, 28 }, { 25, 21 }, - { 30, 20 }, { 31, 12 }, { 27, 16 }, { 24, 42 }, - { 0, 93 }, { 14, 56 }, { 15, 57 }, { 26, 38 }, - { -24, 127 }, - - /* 227 - 275 */ - { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, - { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, - { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, - { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, - { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, - { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, - { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, - { -12, 92 }, { -18, 108 }, { -4, 79 }, { -22, 69 }, - { -16, 75 }, { -2, 58 }, { 1, 58 }, { -13, 78 }, - { -9, 83 }, { -4, 81 }, { -13, 99 }, { -13, 81 }, - { -6, 38 }, { -13, 62 }, { -6, 58 }, { -2, 59 }, - { -16, 73 }, { -10, 76 }, { -13, 86 }, { -9, 83 }, - { -10, 87 }, - - /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ - { 0, 0 }, - - /* 277 - 337 */ - { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, - { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, - { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, - { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, - { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, - { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, - { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, - { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, - { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, - { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, - { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, - { -2, 76 }, { -18, 86 }, { 12, 70 }, { 5, 64 }, - { -12, 70 }, { 11, 55 }, { 5, 56 }, { 0, 69 }, - { 2, 65 }, { -6, 74 }, { 5, 54 }, { 7, 54 }, - { -6, 76 }, { -11, 82 }, { -2, 77 }, { -2, 77 }, - { 25, 42 }, - - /* 338 - 398 */ - { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, - { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, - { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, - { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, - { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, - { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, - { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, - { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, - { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, - { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, - { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, - { 18, 31 }, { 19, 26 }, { 36, 24 }, { 24, 23 }, - { 27, 16 }, { 24, 30 }, { 31, 29 }, { 22, 41 }, - { 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 }, - { 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 }, - { 25, 61 }, - - /* 399 -> 435 */ - { 21, 33 }, { 19, 50 }, { 17, 61 }, - { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, - { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, - { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, - { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, - { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, - { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, - { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, - { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, - { -6, 68 }, { -10, 79 }, - - /* 436 -> 459 */ - { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, - { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, - { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, - { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, - { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, - { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, - - /* 460 - 1024 */ - { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, - { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, - { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, - { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, - { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, - { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, - { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, - { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, - { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, - { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, - { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, - { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, - { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, - { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, - { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, - { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, - { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, - { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, - { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, - { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, - { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, - { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, - { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, - { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, - { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, - { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, - { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, - { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, - { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, - { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, - { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, - { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, - { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, - { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, - { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, - { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, - { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, - { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, - { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, - { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, - { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, - { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, - { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, - { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, - { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, - { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, - { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, - { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, - { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, - { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, - { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, - { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, - { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, - { -14, 59 }, { -9, 52 }, { -11, 68 }, { -3, 78 }, - { -8, 74 }, { -9, 72 }, { -10, 72 }, { -18, 75 }, - { -12, 71 }, { -11, 63 }, { -5, 70 }, { -17, 75 }, - { -14, 72 }, { -16, 67 }, { -8, 53 }, { -14, 59 }, - { -9, 52 }, { -11, 68 }, { 9, -2 }, { 30, -10 }, - { 31, -4 }, { 33, -1 }, { 33, 7 }, { 31, 12 }, - { 37, 23 }, { 31, 38 }, { 20, 64 }, { 9, -2 }, - { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, - { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, - { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, - { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, - { -6, 68 }, { -10, 79 }, { -3, 78 }, { -8, 74 }, - { -9, 72 }, { -10, 72 }, { -18, 75 }, { -12, 71 }, - { -11, 63 }, { -5, 70 }, { -17, 75 }, { -14, 72 }, - { -16, 67 }, { -8, 53 }, { -14, 59 }, { -9, 52 }, - { -11, 68 }, { -3, 78 }, { -8, 74 }, { -9, 72 }, - { -10, 72 }, { -18, 75 }, { -12, 71 }, { -11, 63 }, - { -5, 70 }, { -17, 75 }, { -14, 72 }, { -16, 67 }, - { -8, 53 }, { -14, 59 }, { -9, 52 }, { -11, 68 }, - { 9, -2 }, { 30, -10 }, { 31, -4 }, { 33, -1 }, - { 33, 7 }, { 31, 12 }, { 37, 23 }, { 31, 38 }, - { 20, 64 }, { 9, -2 }, { 30, -10 }, { 31, -4 }, - { 33, -1 }, { 33, 7 }, { 31, 12 }, { 37, 23 }, - { 31, 38 }, { 20, 64 }, { -9, 71 }, { -7, 37 }, - { -8, 44 }, { -11, 49 }, { -10, 56 }, { -12, 59 }, - { -8, 63 }, { -9, 67 }, { -6, 68 }, { -10, 79 }, - { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, - { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, - { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, - { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, - { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, - { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, - { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, - { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, - { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, - { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, - { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, - { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, - { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, - { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, - { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, - { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, - { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, - { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, - { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, - { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, - { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, - { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, - { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, - { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, - { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, - { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, - { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, - { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, - { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, - { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, - { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, - { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, - { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, - { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, - { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, - { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, - { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, - { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, - { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, - { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, - { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, - { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, - { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, - { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, - { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, - { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, - { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, - { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, - { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, - { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, - { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, - { -12, 92 }, { -18, 108 }, { -24, 115 }, { -22, 82 }, - { -9, 62 }, { 0, 53 }, { 0, 59 }, { -14, 85 }, - { -13, 89 }, { -13, 94 }, { -11, 92 }, { -29, 127 }, - { -21, 100 }, { -14, 57 }, { -12, 67 }, { -11, 71 }, - { -10, 77 }, { -21, 85 }, { -16, 88 }, { -23, 104 }, - { -15, 98 }, { -37, 127 }, { -10, 82 }, { -8, 48 }, - { -8, 61 }, { -8, 66 }, { -7, 70 }, { -14, 75 }, - { -10, 79 }, { -9, 83 }, { -12, 92 }, { -18, 108 }, - { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, - { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, - { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 } - } -}; - -const uint8_t x264_cabac_range_lps[64][4] = -{ - { 2, 2, 2, 2}, { 6, 7, 8, 9}, { 6, 7, 9, 10}, { 6, 8, 9, 11}, - { 7, 8, 10, 11}, { 7, 9, 10, 12}, { 7, 9, 11, 12}, { 8, 9, 11, 13}, - { 8, 10, 12, 14}, { 9, 11, 12, 14}, { 9, 11, 13, 15}, { 10, 12, 14, 16}, - { 10, 12, 15, 17}, { 11, 13, 15, 18}, { 11, 14, 16, 19}, { 12, 14, 17, 20}, - { 12, 15, 18, 21}, { 13, 16, 19, 22}, { 14, 17, 20, 23}, { 14, 18, 21, 24}, - { 15, 19, 22, 25}, { 16, 20, 23, 27}, { 17, 21, 25, 28}, { 18, 22, 26, 30}, - { 19, 23, 27, 31}, { 20, 24, 29, 33}, { 21, 26, 30, 35}, { 22, 27, 32, 37}, - { 23, 28, 33, 39}, { 24, 30, 35, 41}, { 26, 31, 37, 43}, { 27, 33, 39, 45}, - { 29, 35, 41, 48}, { 30, 37, 43, 50}, { 32, 39, 46, 53}, { 33, 41, 48, 56}, - { 35, 43, 51, 59}, { 37, 45, 54, 62}, { 39, 48, 56, 65}, { 41, 50, 59, 69}, - { 43, 53, 63, 72}, { 46, 56, 66, 76}, { 48, 59, 69, 80}, { 51, 62, 73, 85}, - { 53, 65, 77, 89}, { 56, 69, 81, 94}, { 59, 72, 86, 99}, { 62, 76, 90, 104}, - { 66, 80, 95, 110}, { 69, 85, 100, 116}, { 73, 89, 105, 122}, { 77, 94, 111, 128}, - { 81, 99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158}, - {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195}, - {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240} -}; - -const uint8_t x264_cabac_transition[128][2] = -{ - { 0, 0}, { 1, 1}, { 2, 50}, { 51, 3}, { 2, 50}, { 51, 3}, { 4, 52}, { 53, 5}, - { 6, 52}, { 53, 7}, { 8, 52}, { 53, 9}, { 10, 54}, { 55, 11}, { 12, 54}, { 55, 13}, - { 14, 54}, { 55, 15}, { 16, 56}, { 57, 17}, { 18, 56}, { 57, 19}, { 20, 56}, { 57, 21}, - { 22, 58}, { 59, 23}, { 24, 58}, { 59, 25}, { 26, 60}, { 61, 27}, { 28, 60}, { 61, 29}, - { 30, 60}, { 61, 31}, { 32, 62}, { 63, 33}, { 34, 62}, { 63, 35}, { 36, 64}, { 65, 37}, - { 38, 66}, { 67, 39}, { 40, 66}, { 67, 41}, { 42, 66}, { 67, 43}, { 44, 68}, { 69, 45}, - { 46, 68}, { 69, 47}, { 48, 70}, { 71, 49}, { 50, 72}, { 73, 51}, { 52, 72}, { 73, 53}, - { 54, 74}, { 75, 55}, { 56, 74}, { 75, 57}, { 58, 76}, { 77, 59}, { 60, 78}, { 79, 61}, - { 62, 78}, { 79, 63}, { 64, 80}, { 81, 65}, { 66, 82}, { 83, 67}, { 68, 82}, { 83, 69}, - { 70, 84}, { 85, 71}, { 72, 84}, { 85, 73}, { 74, 88}, { 89, 75}, { 76, 88}, { 89, 77}, - { 78, 90}, { 91, 79}, { 80, 90}, { 91, 81}, { 82, 94}, { 95, 83}, { 84, 94}, { 95, 85}, - { 86, 96}, { 97, 87}, { 88, 96}, { 97, 89}, { 90, 100}, {101, 91}, { 92, 100}, {101, 93}, - { 94, 102}, {103, 95}, { 96, 104}, {105, 97}, { 98, 104}, {105, 99}, {100, 108}, {109, 101}, - {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109}, - {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117}, - {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125} -}; - -const uint8_t x264_cabac_renorm_shift[64] = -{ - 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -}; - -/* -ln2(probability) */ -const uint16_t x264_cabac_entropy[128] = -{ - FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618), - FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114), - FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610), - FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106), - FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602), - FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099), - FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595), - FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091), - FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588), - FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083), - FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580), - FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076), - FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572), - FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068), - FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565), - FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061), - FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557), - FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053), - FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549), - FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046), - FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542), - FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038), - FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534), - FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030), - FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527), - FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023), - FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519), - FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015), - FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511), - FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008), - FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504), - FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000) -}; - -uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][1024]; +static uint8_t cabac_contexts[4][QP_MAX_SPEC+1][1024]; void x264_cabac_init( x264_t *h ) { @@ -1338,17 +40,14 @@ for( int j = 0; j < ctx_count; j++ ) { int state = x264_clip3( (((*cabac_context_init)[j][0] * qp) >> 4) + (*cabac_context_init)[j][1], 1, 126 ); - x264_cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6); + cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6); } } } -/***************************************************************************** - * - *****************************************************************************/ void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model ) { - memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], CHROMA444 ? 1024 : 460 ); + memcpy( cb->state, cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], CHROMA444 ? 1024 : 460 ); } void x264_cabac_encode_init_core( x264_cabac_t *cb ) @@ -1367,7 +66,7 @@ cb->p_end = p_end; } -static inline void x264_cabac_putbyte( x264_cabac_t *cb ) +static inline void cabac_putbyte( x264_cabac_t *cb ) { if( cb->i_queue >= 0 ) { @@ -1399,13 +98,13 @@ } } -static inline void x264_cabac_encode_renorm( x264_cabac_t *cb ) +static inline void cabac_encode_renorm( x264_cabac_t *cb ) { int shift = x264_cabac_renorm_shift[cb->i_range>>3]; cb->i_range <<= shift; cb->i_low <<= shift; cb->i_queue += shift; - x264_cabac_putbyte( cb ); + cabac_putbyte( cb ); } /* Making custom versions of this function, even in asm, for the cases where @@ -1422,7 +121,7 @@ cb->i_range = i_range_lps; } cb->state[i_ctx] = x264_cabac_transition[i_state][b]; - x264_cabac_encode_renorm( cb ); + cabac_encode_renorm( cb ); } /* Note: b is negated for this function */ @@ -1431,7 +130,7 @@ cb->i_low <<= 1; cb->i_low += b & cb->i_range; cb->i_queue += 1; - x264_cabac_putbyte( cb ); + cabac_putbyte( cb ); } static const int bypass_lut[16] = @@ -1452,7 +151,7 @@ cb->i_low <<= i; cb->i_low += ((x>>k)&0xff) * cb->i_range; cb->i_queue += i; - x264_cabac_putbyte( cb ); + cabac_putbyte( cb ); i = 8; } while( k > 0 ); } @@ -1460,7 +159,7 @@ void x264_cabac_encode_terminal_c( x264_cabac_t *cb ) { cb->i_range -= 2; - x264_cabac_encode_renorm( cb ); + cabac_encode_renorm( cb ); } void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ) @@ -1469,12 +168,12 @@ cb->i_low |= 1; cb->i_low <<= 9; cb->i_queue += 9; - x264_cabac_putbyte( cb ); - x264_cabac_putbyte( cb ); + cabac_putbyte( cb ); + cabac_putbyte( cb ); cb->i_low <<= -cb->i_queue; cb->i_low |= (0x35a4e4f5 >> (h->i_frame & 31) & 1) << 10; cb->i_queue = 0; - x264_cabac_putbyte( cb ); + cabac_putbyte( cb ); while( cb->i_bytes_outstanding > 0 ) { diff -Nru x264-0.152.2854+gite9a5903/common/cabac.h x264-0.158.2988+git-20191101.7817004/common/cabac.h --- x264-0.152.2854+gite9a5903/common/cabac.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/cabac.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.h: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -51,28 +51,36 @@ uint8_t padding[12]; } x264_cabac_t; -extern const uint8_t x264_cabac_transition[128][2]; -extern const uint16_t x264_cabac_entropy[128]; - /* init the contexts given i_slice_type, the quantif and the model */ +#define x264_cabac_context_init x264_template(cabac_context_init) void x264_cabac_context_init( x264_t *h, x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model ); +#define x264_cabac_encode_init_core x264_template(cabac_encode_init_core) void x264_cabac_encode_init_core( x264_cabac_t *cb ); +#define x264_cabac_encode_init x264_template(cabac_encode_init) void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ); +#define x264_cabac_encode_decision_c x264_template(cabac_encode_decision_c) void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b ); +#define x264_cabac_encode_decision_asm x264_template(cabac_encode_decision_asm) void x264_cabac_encode_decision_asm( x264_cabac_t *cb, int i_ctx, int b ); +#define x264_cabac_encode_bypass_c x264_template(cabac_encode_bypass_c) void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b ); +#define x264_cabac_encode_bypass_asm x264_template(cabac_encode_bypass_asm) void x264_cabac_encode_bypass_asm( x264_cabac_t *cb, int b ); +#define x264_cabac_encode_terminal_c x264_template(cabac_encode_terminal_c) void x264_cabac_encode_terminal_c( x264_cabac_t *cb ); +#define x264_cabac_encode_terminal_asm x264_template(cabac_encode_terminal_asm) void x264_cabac_encode_terminal_asm( x264_cabac_t *cb ); +#define x264_cabac_encode_ue_bypass x264_template(cabac_encode_ue_bypass) void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val ); +#define x264_cabac_encode_flush x264_template(cabac_encode_flush) void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ); #if HAVE_MMX #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm diff -Nru x264-0.152.2854+gite9a5903/common/common.c x264-0.158.2988+git-20191101.7817004/common/common.c --- x264-0.152.2854+gite9a5903/common/common.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/common.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * common.c: misc common functions ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -26,1049 +26,6 @@ #include "common.h" -#include - -#if HAVE_MALLOC_H -#include -#endif -#if HAVE_THP -#include -#endif - -const int x264_bit_depth = BIT_DEPTH; - -const int x264_chroma_format = X264_CHROMA_FORMAT; - -static void x264_log_default( void *, int, const char *, va_list ); - -/**************************************************************************** - * x264_param_default: - ****************************************************************************/ -void x264_param_default( x264_param_t *param ) -{ - /* */ - memset( param, 0, sizeof( x264_param_t ) ); - - /* CPU autodetect */ - param->cpu = x264_cpu_detect(); - param->i_threads = X264_THREADS_AUTO; - param->i_lookahead_threads = X264_THREADS_AUTO; - param->b_deterministic = 1; - param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO; - - /* Video properties */ - param->i_csp = X264_CHROMA_FORMAT ? X264_CHROMA_FORMAT : X264_CSP_I420; - param->i_width = 0; - param->i_height = 0; - param->vui.i_sar_width = 0; - param->vui.i_sar_height= 0; - param->vui.i_overscan = 0; /* undef */ - param->vui.i_vidformat = 5; /* undef */ - param->vui.b_fullrange = -1; /* default depends on input */ - param->vui.i_colorprim = 2; /* undef */ - param->vui.i_transfer = 2; /* undef */ - param->vui.i_colmatrix = -1; /* default depends on input */ - param->vui.i_chroma_loc= 0; /* left center */ - param->i_fps_num = 25; - param->i_fps_den = 1; - param->i_level_idc = -1; - param->i_slice_max_size = 0; - param->i_slice_max_mbs = 0; - param->i_slice_count = 0; - - /* Encoder parameters */ - param->i_frame_reference = 3; - param->i_keyint_max = 250; - param->i_keyint_min = X264_KEYINT_MIN_AUTO; - param->i_bframe = 3; - param->i_scenecut_threshold = 40; - param->i_bframe_adaptive = X264_B_ADAPT_FAST; - param->i_bframe_bias = 0; - param->i_bframe_pyramid = X264_B_PYRAMID_NORMAL; - param->b_interlaced = 0; - param->b_constrained_intra = 0; - - param->b_deblocking_filter = 1; - param->i_deblocking_filter_alphac0 = 0; - param->i_deblocking_filter_beta = 0; - - param->b_cabac = 1; - param->i_cabac_init_idc = 0; - - param->rc.i_rc_method = X264_RC_CRF; - param->rc.i_bitrate = 0; - param->rc.f_rate_tolerance = 1.0; - param->rc.i_vbv_max_bitrate = 0; - param->rc.i_vbv_buffer_size = 0; - param->rc.f_vbv_buffer_init = 0.9; - param->rc.i_qp_constant = 23 + QP_BD_OFFSET; - param->rc.f_rf_constant = 23; - param->rc.i_qp_min = 0; - param->rc.i_qp_max = QP_MAX; - param->rc.i_qp_step = 4; - param->rc.f_ip_factor = 1.4; - param->rc.f_pb_factor = 1.3; - param->rc.i_aq_mode = X264_AQ_VARIANCE; - param->rc.f_aq_strength = 1.0; - param->rc.i_lookahead = 40; - - param->rc.b_stat_write = 0; - param->rc.psz_stat_out = "x264_2pass.log"; - param->rc.b_stat_read = 0; - param->rc.psz_stat_in = "x264_2pass.log"; - param->rc.f_qcompress = 0.6; - param->rc.f_qblur = 0.5; - param->rc.f_complexity_blur = 20; - param->rc.i_zones = 0; - param->rc.b_mb_tree = 1; - - /* Log */ - param->pf_log = x264_log_default; - param->p_log_private = NULL; - param->i_log_level = X264_LOG_INFO; - - /* */ - param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8; - param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8 - | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL; - param->analyse.i_me_method = X264_ME_HEX; - param->analyse.f_psy_rd = 1.0; - param->analyse.b_psy = 1; - param->analyse.f_psy_trellis = 0; - param->analyse.i_me_range = 16; - param->analyse.i_subpel_refine = 7; - param->analyse.b_mixed_references = 1; - param->analyse.b_chroma_me = 1; - param->analyse.i_mv_range_thread = -1; - param->analyse.i_mv_range = -1; // set from level_idc - param->analyse.i_chroma_qp_offset = 0; - param->analyse.b_fast_pskip = 1; - param->analyse.b_weighted_bipred = 1; - param->analyse.i_weighted_pred = X264_WEIGHTP_SMART; - param->analyse.b_dct_decimate = 1; - param->analyse.b_transform_8x8 = 1; - param->analyse.i_trellis = 1; - param->analyse.i_luma_deadzone[0] = 21; - param->analyse.i_luma_deadzone[1] = 11; - param->analyse.b_psnr = 0; - param->analyse.b_ssim = 0; - - param->i_cqm_preset = X264_CQM_FLAT; - memset( param->cqm_4iy, 16, sizeof( param->cqm_4iy ) ); - memset( param->cqm_4py, 16, sizeof( param->cqm_4py ) ); - memset( param->cqm_4ic, 16, sizeof( param->cqm_4ic ) ); - memset( param->cqm_4pc, 16, sizeof( param->cqm_4pc ) ); - memset( param->cqm_8iy, 16, sizeof( param->cqm_8iy ) ); - memset( param->cqm_8py, 16, sizeof( param->cqm_8py ) ); - memset( param->cqm_8ic, 16, sizeof( param->cqm_8ic ) ); - memset( param->cqm_8pc, 16, sizeof( param->cqm_8pc ) ); - - param->b_repeat_headers = 1; - param->b_annexb = 1; - param->b_aud = 0; - param->b_vfr_input = 1; - param->i_nal_hrd = X264_NAL_HRD_NONE; - param->b_tff = 1; - param->b_pic_struct = 0; - param->b_fake_interlaced = 0; - param->i_frame_packing = -1; - param->b_opencl = 0; - param->i_opencl_device = 0; - param->opencl_device_id = NULL; - param->psz_clbin_file = NULL; -} - -static int x264_param_apply_preset( x264_param_t *param, const char *preset ) -{ - char *end; - int i = strtol( preset, &end, 10 ); - if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 ) - preset = x264_preset_names[i]; - - if( !strcasecmp( preset, "ultrafast" ) ) - { - param->i_frame_reference = 1; - param->i_scenecut_threshold = 0; - param->b_deblocking_filter = 0; - param->b_cabac = 0; - param->i_bframe = 0; - param->analyse.intra = 0; - param->analyse.inter = 0; - param->analyse.b_transform_8x8 = 0; - param->analyse.i_me_method = X264_ME_DIA; - param->analyse.i_subpel_refine = 0; - param->rc.i_aq_mode = 0; - param->analyse.b_mixed_references = 0; - param->analyse.i_trellis = 0; - param->i_bframe_adaptive = X264_B_ADAPT_NONE; - param->rc.b_mb_tree = 0; - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; - param->analyse.b_weighted_bipred = 0; - param->rc.i_lookahead = 0; - } - else if( !strcasecmp( preset, "superfast" ) ) - { - param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4; - param->analyse.i_me_method = X264_ME_DIA; - param->analyse.i_subpel_refine = 1; - param->i_frame_reference = 1; - param->analyse.b_mixed_references = 0; - param->analyse.i_trellis = 0; - param->rc.b_mb_tree = 0; - param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; - param->rc.i_lookahead = 0; - } - else if( !strcasecmp( preset, "veryfast" ) ) - { - param->analyse.i_subpel_refine = 2; - param->i_frame_reference = 1; - param->analyse.b_mixed_references = 0; - param->analyse.i_trellis = 0; - param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; - param->rc.i_lookahead = 10; - } - else if( !strcasecmp( preset, "faster" ) ) - { - param->analyse.b_mixed_references = 0; - param->i_frame_reference = 2; - param->analyse.i_subpel_refine = 4; - param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; - param->rc.i_lookahead = 20; - } - else if( !strcasecmp( preset, "fast" ) ) - { - param->i_frame_reference = 2; - param->analyse.i_subpel_refine = 6; - param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; - param->rc.i_lookahead = 30; - } - else if( !strcasecmp( preset, "medium" ) ) - { - /* Default is medium */ - } - else if( !strcasecmp( preset, "slow" ) ) - { - param->analyse.i_subpel_refine = 8; - param->i_frame_reference = 5; - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; - param->analyse.i_trellis = 2; - param->rc.i_lookahead = 50; - } - else if( !strcasecmp( preset, "slower" ) ) - { - param->analyse.i_me_method = X264_ME_UMH; - param->analyse.i_subpel_refine = 9; - param->i_frame_reference = 8; - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; - param->analyse.inter |= X264_ANALYSE_PSUB8x8; - param->analyse.i_trellis = 2; - param->rc.i_lookahead = 60; - } - else if( !strcasecmp( preset, "veryslow" ) ) - { - param->analyse.i_me_method = X264_ME_UMH; - param->analyse.i_subpel_refine = 10; - param->analyse.i_me_range = 24; - param->i_frame_reference = 16; - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; - param->analyse.inter |= X264_ANALYSE_PSUB8x8; - param->analyse.i_trellis = 2; - param->i_bframe = 8; - param->rc.i_lookahead = 60; - } - else if( !strcasecmp( preset, "placebo" ) ) - { - param->analyse.i_me_method = X264_ME_TESA; - param->analyse.i_subpel_refine = 11; - param->analyse.i_me_range = 24; - param->i_frame_reference = 16; - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; - param->analyse.inter |= X264_ANALYSE_PSUB8x8; - param->analyse.b_fast_pskip = 0; - param->analyse.i_trellis = 2; - param->i_bframe = 16; - param->rc.i_lookahead = 60; - } - else - { - x264_log( NULL, X264_LOG_ERROR, "invalid preset '%s'\n", preset ); - return -1; - } - return 0; -} - -static int x264_param_apply_tune( x264_param_t *param, const char *tune ) -{ - char *tmp = x264_malloc( strlen( tune ) + 1 ); - if( !tmp ) - return -1; - tmp = strcpy( tmp, tune ); - char *s = strtok( tmp, ",./-+" ); - int psy_tuning_used = 0; - while( s ) - { - if( !strncasecmp( s, "film", 4 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->i_deblocking_filter_alphac0 = -1; - param->i_deblocking_filter_beta = -1; - param->analyse.f_psy_trellis = 0.15; - } - else if( !strncasecmp( s, "animation", 9 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; - param->i_deblocking_filter_alphac0 = 1; - param->i_deblocking_filter_beta = 1; - param->analyse.f_psy_rd = 0.4; - param->rc.f_aq_strength = 0.6; - param->i_bframe += 2; - } - else if( !strncasecmp( s, "grain", 5 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->i_deblocking_filter_alphac0 = -2; - param->i_deblocking_filter_beta = -2; - param->analyse.f_psy_trellis = 0.25; - param->analyse.b_dct_decimate = 0; - param->rc.f_pb_factor = 1.1; - param->rc.f_ip_factor = 1.1; - param->rc.f_aq_strength = 0.5; - param->analyse.i_luma_deadzone[0] = 6; - param->analyse.i_luma_deadzone[1] = 6; - param->rc.f_qcompress = 0.8; - } - else if( !strncasecmp( s, "stillimage", 10 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->i_deblocking_filter_alphac0 = -3; - param->i_deblocking_filter_beta = -3; - param->analyse.f_psy_rd = 2.0; - param->analyse.f_psy_trellis = 0.7; - param->rc.f_aq_strength = 1.2; - } - else if( !strncasecmp( s, "psnr", 4 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->rc.i_aq_mode = X264_AQ_NONE; - param->analyse.b_psy = 0; - } - else if( !strncasecmp( s, "ssim", 4 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE; - param->analyse.b_psy = 0; - } - else if( !strncasecmp( s, "fastdecode", 10 ) ) - { - param->b_deblocking_filter = 0; - param->b_cabac = 0; - param->analyse.b_weighted_bipred = 0; - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; - } - else if( !strncasecmp( s, "zerolatency", 11 ) ) - { - param->rc.i_lookahead = 0; - param->i_sync_lookahead = 0; - param->i_bframe = 0; - param->b_sliced_threads = 1; - param->b_vfr_input = 0; - param->rc.b_mb_tree = 0; - } - else if( !strncasecmp( s, "touhou", 6 ) ) - { - if( psy_tuning_used++ ) goto psy_failure; - param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; - param->i_deblocking_filter_alphac0 = -1; - param->i_deblocking_filter_beta = -1; - param->analyse.f_psy_trellis = 0.2; - param->rc.f_aq_strength = 1.3; - if( param->analyse.inter & X264_ANALYSE_PSUB16x16 ) - param->analyse.inter |= X264_ANALYSE_PSUB8x8; - } - else - { - x264_log( NULL, X264_LOG_ERROR, "invalid tune '%s'\n", s ); - x264_free( tmp ); - return -1; - } - if( 0 ) - { - psy_failure: - x264_log( NULL, X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %s\n", s ); - } - s = strtok( NULL, ",./-+" ); - } - x264_free( tmp ); - return 0; -} - -int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune ) -{ - x264_param_default( param ); - - if( preset && x264_param_apply_preset( param, preset ) < 0 ) - return -1; - if( tune && x264_param_apply_tune( param, tune ) < 0 ) - return -1; - return 0; -} - -void x264_param_apply_fastfirstpass( x264_param_t *param ) -{ - /* Set faster options in case of turbo firstpass. */ - if( param->rc.b_stat_write && !param->rc.b_stat_read ) - { - param->i_frame_reference = 1; - param->analyse.b_transform_8x8 = 0; - param->analyse.inter = 0; - param->analyse.i_me_method = X264_ME_DIA; - param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine ); - param->analyse.i_trellis = 0; - param->analyse.b_fast_pskip = 1; - } -} - -static int profile_string_to_int( const char *str ) -{ - if( !strcasecmp( str, "baseline" ) ) - return PROFILE_BASELINE; - if( !strcasecmp( str, "main" ) ) - return PROFILE_MAIN; - if( !strcasecmp( str, "high" ) ) - return PROFILE_HIGH; - if( !strcasecmp( str, "high10" ) ) - return PROFILE_HIGH10; - if( !strcasecmp( str, "high422" ) ) - return PROFILE_HIGH422; - if( !strcasecmp( str, "high444" ) ) - return PROFILE_HIGH444_PREDICTIVE; - return -1; -} - -int x264_param_apply_profile( x264_param_t *param, const char *profile ) -{ - if( !profile ) - return 0; - - int p = profile_string_to_int( profile ); - if( p < 0 ) - { - x264_log( NULL, X264_LOG_ERROR, "invalid profile: %s\n", profile ); - return -1; - } - if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) || - (param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + QP_BD_OFFSET) <= 0)) ) - { - x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile ); - return -1; - } - if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 ) - { - x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile ); - return -1; - } - if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 ) - { - x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile ); - return -1; - } - if( p < PROFILE_HIGH10 && BIT_DEPTH > 8 ) - { - x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, BIT_DEPTH ); - return -1; - } - - if( p == PROFILE_BASELINE ) - { - param->analyse.b_transform_8x8 = 0; - param->b_cabac = 0; - param->i_cqm_preset = X264_CQM_FLAT; - param->psz_cqm_file = NULL; - param->i_bframe = 0; - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; - if( param->b_interlaced ) - { - x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support interlacing\n" ); - return -1; - } - if( param->b_fake_interlaced ) - { - x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support fake interlacing\n" ); - return -1; - } - } - else if( p == PROFILE_MAIN ) - { - param->analyse.b_transform_8x8 = 0; - param->i_cqm_preset = X264_CQM_FLAT; - param->psz_cqm_file = NULL; - } - return 0; -} - -static int parse_enum( const char *arg, const char * const *names, int *dst ) -{ - for( int i = 0; names[i]; i++ ) - if( !strcasecmp( arg, names[i] ) ) - { - *dst = i; - return 0; - } - return -1; -} - -static int parse_cqm( const char *str, uint8_t *cqm, int length ) -{ - int i = 0; - do { - int coef; - if( !sscanf( str, "%d", &coef ) || coef < 1 || coef > 255 ) - return -1; - cqm[i++] = coef; - } while( i < length && (str = strchr( str, ',' )) && str++ ); - return (i == length) ? 0 : -1; -} - -static int x264_atobool( const char *str, int *b_error ) -{ - if( !strcmp(str, "1") || - !strcasecmp(str, "true") || - !strcasecmp(str, "yes") ) - return 1; - if( !strcmp(str, "0") || - !strcasecmp(str, "false") || - !strcasecmp(str, "no") ) - return 0; - *b_error = 1; - return 0; -} - -static int x264_atoi( const char *str, int *b_error ) -{ - char *end; - int v = strtol( str, &end, 0 ); - if( end == str || *end != '\0' ) - *b_error = 1; - return v; -} - -static double x264_atof( const char *str, int *b_error ) -{ - char *end; - double v = strtod( str, &end ); - if( end == str || *end != '\0' ) - *b_error = 1; - return v; -} - -#define atobool(str) ( name_was_bool = 1, x264_atobool( str, &b_error ) ) -#undef atoi -#undef atof -#define atoi(str) x264_atoi( str, &b_error ) -#define atof(str) x264_atof( str, &b_error ) - -int x264_param_parse( x264_param_t *p, const char *name, const char *value ) -{ - char *name_buf = NULL; - int b_error = 0; - int errortype = X264_PARAM_BAD_VALUE; - int name_was_bool; - int value_was_null = !value; - - if( !name ) - return X264_PARAM_BAD_NAME; - if( !value ) - value = "true"; - - if( value[0] == '=' ) - value++; - - if( strchr( name, '_' ) ) // s/_/-/g - { - char *c; - name_buf = strdup(name); - if( !name_buf ) - return X264_PARAM_BAD_NAME; - while( (c = strchr( name_buf, '_' )) ) - *c = '-'; - name = name_buf; - } - - if( !strncmp( name, "no", 2 ) ) - { - name += 2; - if( name[0] == '-' ) - name++; - value = atobool(value) ? "false" : "true"; - } - name_was_bool = 0; - -#define OPT(STR) else if( !strcmp( name, STR ) ) -#define OPT2(STR0, STR1) else if( !strcmp( name, STR0 ) || !strcmp( name, STR1 ) ) - if( 0 ); - OPT("asm") - { - p->cpu = isdigit(value[0]) ? atoi(value) : - !strcasecmp(value, "auto") || atobool(value) ? x264_cpu_detect() : 0; - if( b_error ) - { - char *buf = strdup( value ); - if( buf ) - { - char *tok, UNUSED *saveptr=NULL, *init; - b_error = 0; - p->cpu = 0; - for( init=buf; (tok=strtok_r(init, ",", &saveptr)); init=NULL ) - { - int i = 0; - while( x264_cpu_names[i].flags && strcasecmp(tok, x264_cpu_names[i].name) ) - i++; - p->cpu |= x264_cpu_names[i].flags; - if( !x264_cpu_names[i].flags ) - b_error = 1; - } - free( buf ); - if( (p->cpu&X264_CPU_SSSE3) && !(p->cpu&X264_CPU_SSE2_IS_SLOW) ) - p->cpu |= X264_CPU_SSE2_IS_FAST; - } - } - } - OPT("threads") - { - if( !strcasecmp(value, "auto") ) - p->i_threads = X264_THREADS_AUTO; - else - p->i_threads = atoi(value); - } - OPT("lookahead-threads") - { - if( !strcasecmp(value, "auto") ) - p->i_lookahead_threads = X264_THREADS_AUTO; - else - p->i_lookahead_threads = atoi(value); - } - OPT("sliced-threads") - p->b_sliced_threads = atobool(value); - OPT("sync-lookahead") - { - if( !strcasecmp(value, "auto") ) - p->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO; - else - p->i_sync_lookahead = atoi(value); - } - OPT2("deterministic", "n-deterministic") - p->b_deterministic = atobool(value); - OPT("cpu-independent") - p->b_cpu_independent = atobool(value); - OPT2("level", "level-idc") - { - if( !strcmp(value, "1b") ) - p->i_level_idc = 9; - else if( atof(value) < 7 ) - p->i_level_idc = (int)(10*atof(value)+.5); - else - p->i_level_idc = atoi(value); - } - OPT("bluray-compat") - p->b_bluray_compat = atobool(value); - OPT("avcintra-class") - p->i_avcintra_class = atoi(value); - OPT("sar") - { - b_error = ( 2 != sscanf( value, "%d:%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) && - 2 != sscanf( value, "%d/%d", &p->vui.i_sar_width, &p->vui.i_sar_height ) ); - } - OPT("overscan") - b_error |= parse_enum( value, x264_overscan_names, &p->vui.i_overscan ); - OPT("videoformat") - b_error |= parse_enum( value, x264_vidformat_names, &p->vui.i_vidformat ); - OPT("fullrange") - b_error |= parse_enum( value, x264_fullrange_names, &p->vui.b_fullrange ); - OPT("colorprim") - b_error |= parse_enum( value, x264_colorprim_names, &p->vui.i_colorprim ); - OPT("transfer") - b_error |= parse_enum( value, x264_transfer_names, &p->vui.i_transfer ); - OPT("colormatrix") - b_error |= parse_enum( value, x264_colmatrix_names, &p->vui.i_colmatrix ); - OPT("chromaloc") - { - p->vui.i_chroma_loc = atoi(value); - b_error = ( p->vui.i_chroma_loc < 0 || p->vui.i_chroma_loc > 5 ); - } - OPT("fps") - { - if( sscanf( value, "%u/%u", &p->i_fps_num, &p->i_fps_den ) != 2 ) - { - double fps = atof(value); - if( fps > 0.0 && fps <= INT_MAX/1000.0 ) - { - p->i_fps_num = (int)(fps * 1000.0 + .5); - p->i_fps_den = 1000; - } - else - { - p->i_fps_num = atoi(value); - p->i_fps_den = 1; - } - } - } - OPT2("ref", "frameref") - p->i_frame_reference = atoi(value); - OPT("dpb-size") - p->i_dpb_size = atoi(value); - OPT("keyint") - { - if( strstr( value, "infinite" ) ) - p->i_keyint_max = X264_KEYINT_MAX_INFINITE; - else - p->i_keyint_max = atoi(value); - } - OPT2("min-keyint", "keyint-min") - { - p->i_keyint_min = atoi(value); - if( p->i_keyint_max < p->i_keyint_min ) - p->i_keyint_max = p->i_keyint_min; - } - OPT("scenecut") - { - p->i_scenecut_threshold = atobool(value); - if( b_error || p->i_scenecut_threshold ) - { - b_error = 0; - p->i_scenecut_threshold = atoi(value); - } - } - OPT("intra-refresh") - p->b_intra_refresh = atobool(value); - OPT("bframes") - p->i_bframe = atoi(value); - OPT("b-adapt") - { - p->i_bframe_adaptive = atobool(value); - if( b_error ) - { - b_error = 0; - p->i_bframe_adaptive = atoi(value); - } - } - OPT("b-bias") - p->i_bframe_bias = atoi(value); - OPT("b-pyramid") - { - b_error |= parse_enum( value, x264_b_pyramid_names, &p->i_bframe_pyramid ); - if( b_error ) - { - b_error = 0; - p->i_bframe_pyramid = atoi(value); - } - } - OPT("open-gop") - p->b_open_gop = atobool(value); - OPT("nf") - p->b_deblocking_filter = !atobool(value); - OPT2("filter", "deblock") - { - if( 2 == sscanf( value, "%d:%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) || - 2 == sscanf( value, "%d,%d", &p->i_deblocking_filter_alphac0, &p->i_deblocking_filter_beta ) ) - { - p->b_deblocking_filter = 1; - } - else if( sscanf( value, "%d", &p->i_deblocking_filter_alphac0 ) ) - { - p->b_deblocking_filter = 1; - p->i_deblocking_filter_beta = p->i_deblocking_filter_alphac0; - } - else - p->b_deblocking_filter = atobool(value); - } - OPT("slice-max-size") - p->i_slice_max_size = atoi(value); - OPT("slice-max-mbs") - p->i_slice_max_mbs = atoi(value); - OPT("slice-min-mbs") - p->i_slice_min_mbs = atoi(value); - OPT("slices") - p->i_slice_count = atoi(value); - OPT("slices-max") - p->i_slice_count_max = atoi(value); - OPT("cabac") - p->b_cabac = atobool(value); - OPT("cabac-idc") - p->i_cabac_init_idc = atoi(value); - OPT("interlaced") - p->b_interlaced = atobool(value); - OPT("tff") - p->b_interlaced = p->b_tff = atobool(value); - OPT("bff") - { - p->b_interlaced = atobool(value); - p->b_tff = !p->b_interlaced; - } - OPT("constrained-intra") - p->b_constrained_intra = atobool(value); - OPT("cqm") - { - if( strstr( value, "flat" ) ) - p->i_cqm_preset = X264_CQM_FLAT; - else if( strstr( value, "jvt" ) ) - p->i_cqm_preset = X264_CQM_JVT; - else - p->psz_cqm_file = strdup(value); - } - OPT("cqmfile") - p->psz_cqm_file = strdup(value); - OPT("cqm4") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4iy, 16 ); - b_error |= parse_cqm( value, p->cqm_4py, 16 ); - b_error |= parse_cqm( value, p->cqm_4ic, 16 ); - b_error |= parse_cqm( value, p->cqm_4pc, 16 ); - } - OPT("cqm8") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_8iy, 64 ); - b_error |= parse_cqm( value, p->cqm_8py, 64 ); - b_error |= parse_cqm( value, p->cqm_8ic, 64 ); - b_error |= parse_cqm( value, p->cqm_8pc, 64 ); - } - OPT("cqm4i") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4iy, 16 ); - b_error |= parse_cqm( value, p->cqm_4ic, 16 ); - } - OPT("cqm4p") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4py, 16 ); - b_error |= parse_cqm( value, p->cqm_4pc, 16 ); - } - OPT("cqm4iy") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4iy, 16 ); - } - OPT("cqm4ic") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4ic, 16 ); - } - OPT("cqm4py") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4py, 16 ); - } - OPT("cqm4pc") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_4pc, 16 ); - } - OPT("cqm8i") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_8iy, 64 ); - b_error |= parse_cqm( value, p->cqm_8ic, 64 ); - } - OPT("cqm8p") - { - p->i_cqm_preset = X264_CQM_CUSTOM; - b_error |= parse_cqm( value, p->cqm_8py, 64 ); - b_error |= parse_cqm( value, p->cqm_8pc, 64 ); - } - OPT("log") - p->i_log_level = atoi(value); - OPT("dump-yuv") - p->psz_dump_yuv = strdup(value); - OPT2("analyse", "partitions") - { - p->analyse.inter = 0; - if( strstr( value, "none" ) ) p->analyse.inter = 0; - if( strstr( value, "all" ) ) p->analyse.inter = ~0; - - if( strstr( value, "i4x4" ) ) p->analyse.inter |= X264_ANALYSE_I4x4; - if( strstr( value, "i8x8" ) ) p->analyse.inter |= X264_ANALYSE_I8x8; - if( strstr( value, "p8x8" ) ) p->analyse.inter |= X264_ANALYSE_PSUB16x16; - if( strstr( value, "p4x4" ) ) p->analyse.inter |= X264_ANALYSE_PSUB8x8; - if( strstr( value, "b8x8" ) ) p->analyse.inter |= X264_ANALYSE_BSUB16x16; - } - OPT("8x8dct") - p->analyse.b_transform_8x8 = atobool(value); - OPT2("weightb", "weight-b") - p->analyse.b_weighted_bipred = atobool(value); - OPT("weightp") - p->analyse.i_weighted_pred = atoi(value); - OPT2("direct", "direct-pred") - b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred ); - OPT("chroma-qp-offset") - p->analyse.i_chroma_qp_offset = atoi(value); - OPT("me") - b_error |= parse_enum( value, x264_motion_est_names, &p->analyse.i_me_method ); - OPT2("merange", "me-range") - p->analyse.i_me_range = atoi(value); - OPT2("mvrange", "mv-range") - p->analyse.i_mv_range = atoi(value); - OPT2("mvrange-thread", "mv-range-thread") - p->analyse.i_mv_range_thread = atoi(value); - OPT2("subme", "subq") - p->analyse.i_subpel_refine = atoi(value); - OPT("psy-rd") - { - if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) || - 2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) || - 2 == sscanf( value, "%f|%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis )) - { } - else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) ) - { - p->analyse.f_psy_trellis = 0; - } - else - { - p->analyse.f_psy_rd = 0; - p->analyse.f_psy_trellis = 0; - } - } - OPT("psy") - p->analyse.b_psy = atobool(value); - OPT("chroma-me") - p->analyse.b_chroma_me = atobool(value); - OPT("mixed-refs") - p->analyse.b_mixed_references = atobool(value); - OPT("trellis") - p->analyse.i_trellis = atoi(value); - OPT("fast-pskip") - p->analyse.b_fast_pskip = atobool(value); - OPT("dct-decimate") - p->analyse.b_dct_decimate = atobool(value); - OPT("deadzone-inter") - p->analyse.i_luma_deadzone[0] = atoi(value); - OPT("deadzone-intra") - p->analyse.i_luma_deadzone[1] = atoi(value); - OPT("nr") - p->analyse.i_noise_reduction = atoi(value); - OPT("bitrate") - { - p->rc.i_bitrate = atoi(value); - p->rc.i_rc_method = X264_RC_ABR; - } - OPT2("qp", "qp_constant") - { - p->rc.i_qp_constant = atoi(value); - p->rc.i_rc_method = X264_RC_CQP; - } - OPT("crf") - { - p->rc.f_rf_constant = atof(value); - p->rc.i_rc_method = X264_RC_CRF; - } - OPT("crf-max") - p->rc.f_rf_constant_max = atof(value); - OPT("rc-lookahead") - p->rc.i_lookahead = atoi(value); - OPT2("qpmin", "qp-min") - p->rc.i_qp_min = atoi(value); - OPT2("qpmax", "qp-max") - p->rc.i_qp_max = atoi(value); - OPT2("qpstep", "qp-step") - p->rc.i_qp_step = atoi(value); - OPT("ratetol") - p->rc.f_rate_tolerance = !strncmp("inf", value, 3) ? 1e9 : atof(value); - OPT("vbv-maxrate") - p->rc.i_vbv_max_bitrate = atoi(value); - OPT("vbv-bufsize") - p->rc.i_vbv_buffer_size = atoi(value); - OPT("vbv-init") - p->rc.f_vbv_buffer_init = atof(value); - OPT2("ipratio", "ip-factor") - p->rc.f_ip_factor = atof(value); - OPT2("pbratio", "pb-factor") - p->rc.f_pb_factor = atof(value); - OPT("aq-mode") - p->rc.i_aq_mode = atoi(value); - OPT("aq-strength") - p->rc.f_aq_strength = atof(value); - OPT("pass") - { - int pass = x264_clip3( atoi(value), 0, 3 ); - p->rc.b_stat_write = pass & 1; - p->rc.b_stat_read = pass & 2; - } - OPT("stats") - { - p->rc.psz_stat_in = strdup(value); - p->rc.psz_stat_out = strdup(value); - } - OPT("qcomp") - p->rc.f_qcompress = atof(value); - OPT("mbtree") - p->rc.b_mb_tree = atobool(value); - OPT("qblur") - p->rc.f_qblur = atof(value); - OPT2("cplxblur", "cplx-blur") - p->rc.f_complexity_blur = atof(value); - OPT("zones") - p->rc.psz_zones = strdup(value); - OPT("crop-rect") - b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top, - &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4; - OPT("psnr") - p->analyse.b_psnr = atobool(value); - OPT("ssim") - p->analyse.b_ssim = atobool(value); - OPT("aud") - p->b_aud = atobool(value); - OPT("sps-id") - p->i_sps_id = atoi(value); - OPT("global-header") - p->b_repeat_headers = !atobool(value); - OPT("repeat-headers") - p->b_repeat_headers = atobool(value); - OPT("annexb") - p->b_annexb = atobool(value); - OPT("force-cfr") - p->b_vfr_input = !atobool(value); - OPT("nal-hrd") - b_error |= parse_enum( value, x264_nal_hrd_names, &p->i_nal_hrd ); - OPT("filler") - p->rc.b_filler = atobool(value); - OPT("pic-struct") - p->b_pic_struct = atobool(value); - OPT("fake-interlaced") - p->b_fake_interlaced = atobool(value); - OPT("frame-packing") - p->i_frame_packing = atoi(value); - OPT("stitchable") - p->b_stitchable = atobool(value); - OPT("opencl") - p->b_opencl = atobool( value ); - OPT("opencl-clbin") - p->psz_clbin_file = strdup( value ); - OPT("opencl-device") - p->i_opencl_device = atoi( value ); - else - { - b_error = 1; - errortype = X264_PARAM_BAD_NAME; - } -#undef OPT -#undef OPT2 -#undef atobool -#undef atoi -#undef atof - - if( name_buf ) - free( name_buf ); - - b_error |= value_was_null && !name_was_bool; - return b_error ? errortype : 0; -} - /**************************************************************************** * x264_log: ****************************************************************************/ @@ -1085,362 +42,3 @@ va_end( arg ); } } - -static void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg ) -{ - char *psz_prefix; - switch( i_level ) - { - case X264_LOG_ERROR: - psz_prefix = "error"; - break; - case X264_LOG_WARNING: - psz_prefix = "warning"; - break; - case X264_LOG_INFO: - psz_prefix = "info"; - break; - case X264_LOG_DEBUG: - psz_prefix = "debug"; - break; - default: - psz_prefix = "unknown"; - break; - } - fprintf( stderr, "x264 [%s]: ", psz_prefix ); - x264_vfprintf( stderr, psz_fmt, arg ); -} - -/**************************************************************************** - * x264_picture_init: - ****************************************************************************/ -void x264_picture_init( x264_picture_t *pic ) -{ - memset( pic, 0, sizeof( x264_picture_t ) ); - pic->i_type = X264_TYPE_AUTO; - pic->i_qpplus1 = X264_QP_AUTO; - pic->i_pic_struct = PIC_STRUCT_AUTO; -} - -/**************************************************************************** - * x264_picture_alloc: - ****************************************************************************/ -int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) -{ - typedef struct - { - int planes; - int width_fix8[3]; - int height_fix8[3]; - } x264_csp_tab_t; - - static const x264_csp_tab_t x264_csp_tab[] = - { - [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, - [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, - [X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, - [X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, - [X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, - [X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } }, - [X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, }, - [X264_CSP_YUYV] = { 1, { 256*2 }, { 256*1 }, }, - [X264_CSP_UYVY] = { 1, { 256*2 }, { 256*1 }, }, - [X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } }, - [X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } }, - [X264_CSP_BGR] = { 1, { 256*3 }, { 256*1 }, }, - [X264_CSP_BGRA] = { 1, { 256*4 }, { 256*1 }, }, - [X264_CSP_RGB] = { 1, { 256*3 }, { 256*1 }, }, - }; - - int csp = i_csp & X264_CSP_MASK; - if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 ) - return -1; - x264_picture_init( pic ); - pic->img.i_csp = i_csp; - pic->img.i_plane = x264_csp_tab[csp].planes; - int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1; - int plane_offset[3] = {0}; - int frame_size = 0; - for( int i = 0; i < pic->img.i_plane; i++ ) - { - int stride = (((int64_t)i_width * x264_csp_tab[csp].width_fix8[i]) >> 8) * depth_factor; - int plane_size = (((int64_t)i_height * x264_csp_tab[csp].height_fix8[i]) >> 8) * stride; - pic->img.i_stride[i] = stride; - plane_offset[i] = frame_size; - frame_size += plane_size; - } - pic->img.plane[0] = x264_malloc( frame_size ); - if( !pic->img.plane[0] ) - return -1; - for( int i = 1; i < pic->img.i_plane; i++ ) - pic->img.plane[i] = pic->img.plane[0] + plane_offset[i]; - return 0; -} - -/**************************************************************************** - * x264_picture_clean: - ****************************************************************************/ -void x264_picture_clean( x264_picture_t *pic ) -{ - x264_free( pic->img.plane[0] ); - - /* just to be safe */ - memset( pic, 0, sizeof( x264_picture_t ) ); -} - -/**************************************************************************** - * x264_malloc: - ****************************************************************************/ -void *x264_malloc( int i_size ) -{ - uint8_t *align_buf = NULL; -#if HAVE_MALLOC_H -#if HAVE_THP -#define HUGE_PAGE_SIZE 2*1024*1024 -#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ - /* Attempt to allocate huge pages to reduce TLB misses. */ - if( i_size >= HUGE_PAGE_THRESHOLD ) - { - align_buf = memalign( HUGE_PAGE_SIZE, i_size ); - if( align_buf ) - { - /* Round up to the next huge page boundary if we are close enough. */ - size_t madv_size = (i_size + HUGE_PAGE_SIZE - HUGE_PAGE_THRESHOLD) & ~(HUGE_PAGE_SIZE-1); - madvise( align_buf, madv_size, MADV_HUGEPAGE ); - } - } - else -#undef HUGE_PAGE_SIZE -#undef HUGE_PAGE_THRESHOLD -#endif - align_buf = memalign( NATIVE_ALIGN, i_size ); -#else - uint8_t *buf = malloc( i_size + (NATIVE_ALIGN-1) + sizeof(void **) ); - if( buf ) - { - align_buf = buf + (NATIVE_ALIGN-1) + sizeof(void **); - align_buf -= (intptr_t) align_buf & (NATIVE_ALIGN-1); - *( (void **) ( align_buf - sizeof(void **) ) ) = buf; - } -#endif - if( !align_buf ) - x264_log( NULL, X264_LOG_ERROR, "malloc of size %d failed\n", i_size ); - return align_buf; -} - -/**************************************************************************** - * x264_free: - ****************************************************************************/ -void x264_free( void *p ) -{ - if( p ) - { -#if HAVE_MALLOC_H - free( p ); -#else - free( *( ( ( void **) p ) - 1 ) ); -#endif - } -} - -/**************************************************************************** - * x264_reduce_fraction: - ****************************************************************************/ -#define REDUCE_FRACTION( name, type )\ -void name( type *n, type *d )\ -{ \ - type a = *n; \ - type b = *d; \ - type c; \ - if( !a || !b ) \ - return; \ - c = a % b; \ - while( c ) \ - { \ - a = b; \ - b = c; \ - c = a % b; \ - } \ - *n /= b; \ - *d /= b; \ -} - -REDUCE_FRACTION( x264_reduce_fraction , uint32_t ) -REDUCE_FRACTION( x264_reduce_fraction64, uint64_t ) - -/**************************************************************************** - * x264_slurp_file: - ****************************************************************************/ -char *x264_slurp_file( const char *filename ) -{ - int b_error = 0; - int64_t i_size; - char *buf; - FILE *fh = x264_fopen( filename, "rb" ); - if( !fh ) - return NULL; - - b_error |= fseek( fh, 0, SEEK_END ) < 0; - b_error |= ( i_size = ftell( fh ) ) <= 0; - if( WORD_SIZE == 4 ) - b_error |= i_size > INT32_MAX; - b_error |= fseek( fh, 0, SEEK_SET ) < 0; - if( b_error ) - goto error; - - buf = x264_malloc( i_size+2 ); - if( !buf ) - goto error; - - b_error |= fread( buf, 1, i_size, fh ) != i_size; - fclose( fh ); - if( b_error ) - { - x264_free( buf ); - return NULL; - } - - if( buf[i_size-1] != '\n' ) - buf[i_size++] = '\n'; - buf[i_size] = '\0'; - - return buf; -error: - fclose( fh ); - return NULL; -} - -/**************************************************************************** - * x264_param2string: - ****************************************************************************/ -char *x264_param2string( x264_param_t *p, int b_res ) -{ - int len = 1000; - char *buf, *s; - if( p->rc.psz_zones ) - len += strlen(p->rc.psz_zones); - buf = s = x264_malloc( len ); - if( !buf ) - return NULL; - - if( b_res ) - { - s += sprintf( s, "%dx%d ", p->i_width, p->i_height ); - s += sprintf( s, "fps=%u/%u ", p->i_fps_num, p->i_fps_den ); - s += sprintf( s, "timebase=%u/%u ", p->i_timebase_num, p->i_timebase_den ); - s += sprintf( s, "bitdepth=%d ", BIT_DEPTH ); - } - - if( p->b_opencl ) - s += sprintf( s, "opencl=%d ", p->b_opencl ); - s += sprintf( s, "cabac=%d", p->b_cabac ); - s += sprintf( s, " ref=%d", p->i_frame_reference ); - s += sprintf( s, " deblock=%d:%d:%d", p->b_deblocking_filter, - p->i_deblocking_filter_alphac0, p->i_deblocking_filter_beta ); - s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); - s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); - s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); - s += sprintf( s, " psy=%d", p->analyse.b_psy ); - if( p->analyse.b_psy ) - s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis ); - s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); - s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); - s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me ); - s += sprintf( s, " trellis=%d", p->analyse.i_trellis ); - s += sprintf( s, " 8x8dct=%d", p->analyse.b_transform_8x8 ); - s += sprintf( s, " cqm=%d", p->i_cqm_preset ); - s += sprintf( s, " deadzone=%d,%d", p->analyse.i_luma_deadzone[0], p->analyse.i_luma_deadzone[1] ); - s += sprintf( s, " fast_pskip=%d", p->analyse.b_fast_pskip ); - s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset ); - s += sprintf( s, " threads=%d", p->i_threads ); - s += sprintf( s, " lookahead_threads=%d", p->i_lookahead_threads ); - s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads ); - if( p->i_slice_count ) - s += sprintf( s, " slices=%d", p->i_slice_count ); - if( p->i_slice_count_max ) - s += sprintf( s, " slices_max=%d", p->i_slice_count_max ); - if( p->i_slice_max_size ) - s += sprintf( s, " slice_max_size=%d", p->i_slice_max_size ); - if( p->i_slice_max_mbs ) - s += sprintf( s, " slice_max_mbs=%d", p->i_slice_max_mbs ); - if( p->i_slice_min_mbs ) - s += sprintf( s, " slice_min_mbs=%d", p->i_slice_min_mbs ); - s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction ); - s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate ); - s += sprintf( s, " interlaced=%s", p->b_interlaced ? p->b_tff ? "tff" : "bff" : p->b_fake_interlaced ? "fake" : "0" ); - s += sprintf( s, " bluray_compat=%d", p->b_bluray_compat ); - if( p->b_stitchable ) - s += sprintf( s, " stitchable=%d", p->b_stitchable ); - - s += sprintf( s, " constrained_intra=%d", p->b_constrained_intra ); - - s += sprintf( s, " bframes=%d", p->i_bframe ); - if( p->i_bframe ) - { - s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d", - p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias, - p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop ); - } - s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 ); - - if( p->i_keyint_max == X264_KEYINT_MAX_INFINITE ) - s += sprintf( s, " keyint=infinite" ); - else - s += sprintf( s, " keyint=%d", p->i_keyint_max ); - s += sprintf( s, " keyint_min=%d scenecut=%d intra_refresh=%d", - p->i_keyint_min, p->i_scenecut_threshold, p->b_intra_refresh ); - - if( p->rc.b_mb_tree || p->rc.i_vbv_buffer_size ) - s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead ); - - s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ? - ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" ) - : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree ); - if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) - { - if( p->rc.i_rc_method == X264_RC_CRF ) - s += sprintf( s, " crf=%.1f", p->rc.f_rf_constant ); - else - s += sprintf( s, " bitrate=%d ratetol=%.1f", - p->rc.i_bitrate, p->rc.f_rate_tolerance ); - s += sprintf( s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d", - p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step ); - if( p->rc.b_stat_read ) - s += sprintf( s, " cplxblur=%.1f qblur=%.1f", - p->rc.f_complexity_blur, p->rc.f_qblur ); - if( p->rc.i_vbv_buffer_size ) - { - s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d", - p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size ); - if( p->rc.i_rc_method == X264_RC_CRF ) - s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max ); - } - } - else if( p->rc.i_rc_method == X264_RC_CQP ) - s += sprintf( s, " qp=%d", p->rc.i_qp_constant ); - - if( p->rc.i_vbv_buffer_size ) - s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler ); - if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom ) - s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top, - p->crop_rect.i_right, p->crop_rect.i_bottom ); - if( p->i_frame_packing >= 0 ) - s += sprintf( s, " frame-packing=%d", p->i_frame_packing ); - - if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) - { - s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); - if( p->i_bframe && !p->rc.b_mb_tree ) - s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); - s += sprintf( s, " aq=%d", p->rc.i_aq_mode ); - if( p->rc.i_aq_mode ) - s += sprintf( s, ":%.2f", p->rc.f_aq_strength ); - if( p->rc.psz_zones ) - s += sprintf( s, " zones=%s", p->rc.psz_zones ); - else if( p->rc.i_zones ) - s += sprintf( s, " zones" ); - } - - return buf; -} - diff -Nru x264-0.152.2854+gite9a5903/common/common.h x264-0.158.2988+git-20191101.7817004/common/common.h --- x264-0.152.2854+gite9a5903/common/common.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/common.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * common.h: misc common functions ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -27,101 +27,46 @@ #ifndef X264_COMMON_H #define X264_COMMON_H +#include "base.h" + +/* Macros for templating function calls according to bit depth */ +#define x264_template(w) x264_glue3(x264, BIT_DEPTH, w) + +/**************************************************************************** + * API Templates + ****************************************************************************/ +#define x264_nal_encode x264_template(nal_encode) +#define x264_encoder_reconfig x264_template(encoder_reconfig) +#define x264_encoder_parameters x264_template(encoder_parameters) +#define x264_encoder_headers x264_template(encoder_headers) +#define x264_encoder_encode x264_template(encoder_encode) +#define x264_encoder_close x264_template(encoder_close) +#define x264_encoder_delayed_frames x264_template(encoder_delayed_frames) +#define x264_encoder_maximum_delayed_frames x264_template(encoder_maximum_delayed_frames) +#define x264_encoder_intra_refresh x264_template(encoder_intra_refresh) +#define x264_encoder_invalidate_reference x264_template(encoder_invalidate_reference) + +/* This undef allows to rename the external symbol and force link failure in case + * of incompatible libraries. Then the define enables templating as above. */ +#undef x264_encoder_open +#define x264_encoder_open x264_template(encoder_open) + /**************************************************************************** * Macros ****************************************************************************/ -#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) ) -#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) ) -#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c))) -#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c))) -#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d))) -#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d))) -#define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 ) -#define IS_DISPOSABLE(type) ( type == X264_TYPE_B ) -#define FIX8(f) ((int)(f*(1<<8)+.5)) -#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) -#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0]))) - -#define CHECKED_MALLOC( var, size )\ -do {\ - var = x264_malloc( size );\ - if( !var )\ - goto fail;\ -} while( 0 ) -#define CHECKED_MALLOCZERO( var, size )\ -do {\ - CHECKED_MALLOC( var, size );\ - memset( var, 0, size );\ -} while( 0 ) - -/* Macros for merging multiple allocations into a single large malloc, for improved - * use with huge pages. */ - -/* Needs to be enough to contain any set of buffers that use combined allocations */ -#define PREALLOC_BUF_SIZE 1024 - -#define PREALLOC_INIT\ - int prealloc_idx = 0;\ - size_t prealloc_size = 0;\ - uint8_t **preallocs[PREALLOC_BUF_SIZE]; - -#define PREALLOC( var, size )\ -do {\ - var = (void*)prealloc_size;\ - preallocs[prealloc_idx++] = (uint8_t**)&var;\ - prealloc_size += ALIGN(size, NATIVE_ALIGN);\ -} while( 0 ) - -#define PREALLOC_END( ptr )\ -do {\ - CHECKED_MALLOC( ptr, prealloc_size );\ - while( prealloc_idx-- )\ - *preallocs[prealloc_idx] += (intptr_t)ptr;\ -} while( 0 ) - -#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) - -#define X264_BFRAME_MAX 16 -#define X264_REF_MAX 16 -#define X264_THREAD_MAX 128 -#define X264_LOOKAHEAD_THREAD_MAX 16 #define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16) -#define X264_LOOKAHEAD_MAX 250 #define QP_BD_OFFSET (6*(BIT_DEPTH-8)) #define QP_MAX_SPEC (51+QP_BD_OFFSET) #define QP_MAX (QP_MAX_SPEC+18) -#define QP_MAX_MAX (51+2*6+18) #define PIXEL_MAX ((1 << BIT_DEPTH)-1) // arbitrary, but low because SATD scores are 1/4 normal #define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET) #define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC) -// number of pixels (per thread) in progress at any given time. -// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety -#define X264_THREAD_HEIGHT 24 - -/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled - * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly - * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when - * real weights are being used. */ - -#define X264_WEIGHTP_FAKE (-1) - #define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame #define FILLER_OVERHEAD (NALU_OVERHEAD+1) #define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1))) -/**************************************************************************** - * Includes - ****************************************************************************/ -#include "osdep.h" -#include -#include -#include -#include -#include -#include - #if HAVE_INTERLACED # define MB_INTERLACED h->mb.b_interlaced # define SLICE_MBAFF h->sh.b_mbaff @@ -141,29 +86,10 @@ # define CHROMA_V_SHIFT h->mb.chroma_v_shift #endif -#define CHROMA_SIZE(s) ((s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT)) +#define CHROMA_SIZE(s) (CHROMA_FORMAT ? (s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT) : 0) #define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s)) #define CHROMA444 (CHROMA_FORMAT == CHROMA_444) -/* Unions for type-punning. - * Mn: load or store n bits, aligned, native-endian - * CPn: copy n bits, aligned, native-endian - * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */ -typedef union { uint16_t i; uint8_t c[2]; } MAY_ALIAS x264_union16_t; -typedef union { uint32_t i; uint16_t b[2]; uint8_t c[4]; } MAY_ALIAS x264_union32_t; -typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t; -typedef struct { uint64_t i[2]; } x264_uint128_t; -typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t; -#define M16(src) (((x264_union16_t*)(src))->i) -#define M32(src) (((x264_union32_t*)(src))->i) -#define M64(src) (((x264_union64_t*)(src))->i) -#define M128(src) (((x264_union128_t*)(src))->i) -#define M128_ZERO ((x264_uint128_t){{0,0}}) -#define CP16(dst,src) M16(dst) = M16(src) -#define CP32(dst,src) M32(dst) = M32(src) -#define CP64(dst,src) M64(dst) = M64(src) -#define CP128(dst,src) M128(dst) = M128(src) - #if HIGH_BIT_DEPTH typedef uint16_t pixel; typedef uint64_t pixel4; @@ -182,55 +108,11 @@ # define MPIXEL_X4(src) M32(src) #endif -#define BIT_DEPTH X264_BIT_DEPTH - #define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src) -#define X264_SCAN8_LUMA_SIZE (5*8) -#define X264_SCAN8_SIZE (X264_SCAN8_LUMA_SIZE*3) -#define X264_SCAN8_0 (4+1*8) - -/* Scan8 organization: - * 0 1 2 3 4 5 6 7 - * 0 DY y y y y y - * 1 y Y Y Y Y - * 2 y Y Y Y Y - * 3 y Y Y Y Y - * 4 y Y Y Y Y - * 5 DU u u u u u - * 6 u U U U U - * 7 u U U U U - * 8 u U U U U - * 9 u U U U U - * 10 DV v v v v v - * 11 v V V V V - * 12 v V V V V - * 13 v V V V V - * 14 v V V V V - * DY/DU/DV are for luma/chroma DC. - */ - -#define LUMA_DC 48 -#define CHROMA_DC 49 - -static const uint8_t x264_scan8[16*3 + 3] = -{ - 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, - 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, - 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, - 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, - 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, - 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, - 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, - 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, - 4+11*8, 5+11*8, 4+12*8, 5+12*8, - 6+11*8, 7+11*8, 6+12*8, 7+12*8, - 4+13*8, 5+13*8, 4+14*8, 5+14*8, - 6+13*8, 7+13*8, 6+14*8, 7+14*8, - 0+ 0*8, 0+ 5*8, 0+10*8 -}; - -#include "x264.h" +/**************************************************************************** + * Includes + ****************************************************************************/ #if HAVE_OPENCL #include "opencl.h" #endif @@ -243,30 +125,19 @@ #include "frame.h" #include "dct.h" #include "quant.h" -#include "cpu.h" #include "threadpool.h" /**************************************************************************** * General functions ****************************************************************************/ -/* x264_malloc : will do or emulate a memalign - * you have to use x264_free for buffers allocated with x264_malloc */ -void *x264_malloc( int ); -void x264_free( void * ); - -/* x264_slurp_file: malloc space for the whole file and read it */ -char *x264_slurp_file( const char *filename ); - -/* x264_param2string: return a (malloced) string containing most of - * the encoding options */ -char *x264_param2string( x264_param_t *p, int b_res ); /* log */ +#define x264_log x264_template(log) void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... ); -void x264_reduce_fraction( uint32_t *n, uint32_t *d ); -void x264_reduce_fraction64( uint64_t *n, uint64_t *d ); +#define x264_cavlc_init x264_template(cavlc_init) void x264_cavlc_init( x264_t *h ); +#define x264_cabac_init x264_template(cabac_init) void x264_cabac_init( x264_t *h ); static ALWAYS_INLINE pixel x264_clip_pixel( int x ) @@ -274,97 +145,9 @@ return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x ); } -static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max ) -{ - return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v ); -} - -static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max ) -{ - return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v ); -} - -static ALWAYS_INLINE int x264_median( int a, int b, int c ) -{ - int t = (a-b)&((a-b)>>31); - a -= t; - b += t; - b -= (b-c)&((b-c)>>31); - b += (a-b)&((a-b)>>31); - return b; -} - -static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c ) -{ - dst[0] = x264_median( a[0], b[0], c[0] ); - dst[1] = x264_median( a[1], b[1], c[1] ); -} - -static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc ) -{ - int sum = 0; - for( int i = 0; i < i_mvc-1; i++ ) - { - sum += abs( mvc[i][0] - mvc[i+1][0] ) - + abs( mvc[i][1] - mvc[i+1][1] ); - } - return sum; -} - -static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop ) -{ - int amvd0 = mvdleft[0] + mvdtop[0]; - int amvd1 = mvdleft[1] + mvdtop[1]; - amvd0 = (amvd0 > 2) + (amvd0 > 32); - amvd1 = (amvd1 > 2) + (amvd1 > 32); - return amvd0 + (amvd1<<8); -} - -extern const uint8_t x264_exp2_lut[64]; -extern const float x264_log2_lut[128]; -extern const float x264_log2_lz_lut[32]; - -/* Not a general-purpose function; multiplies input by -1/6 to convert - * qp to qscale. */ -static ALWAYS_INLINE int x264_exp2fix8( float x ) -{ - int i = x*(-64.f/6.f) + 512.5f; - if( i < 0 ) return 0; - if( i > 1023 ) return 0xffff; - return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8; -} - -static ALWAYS_INLINE float x264_log2( uint32_t x ) -{ - int lz = x264_clz( x ); - return x264_log2_lut[(x<>24)&0x7f] + x264_log2_lz_lut[lz]; -} - /**************************************************************************** * ****************************************************************************/ -enum slice_type_e -{ - SLICE_TYPE_P = 0, - SLICE_TYPE_B = 1, - SLICE_TYPE_I = 2, -}; - -static const char slice_type_to_char[] = { 'P', 'B', 'I' }; - -enum sei_payload_type_e -{ - SEI_BUFFERING_PERIOD = 0, - SEI_PIC_TIMING = 1, - SEI_PAN_SCAN_RECT = 2, - SEI_FILLER = 3, - SEI_USER_DATA_REGISTERED = 4, - SEI_USER_DATA_UNREGISTERED = 5, - SEI_RECOVERY_POINT = 6, - SEI_DEC_REF_PIC_MARKING = 7, - SEI_FRAME_PACKING = 45, -}; - typedef struct { x264_sps_t *sps; @@ -560,9 +343,14 @@ udctcoef (*quant8_bias0[4])[64]; /* [4][QP_MAX_SPEC+1][64] */ udctcoef (*nr_offset_emergency)[4][64]; - /* mv/ref cost arrays. */ + /* mv/ref/mode cost arrays. */ uint16_t *cost_mv[QP_MAX+1]; uint16_t *cost_mv_fpel[QP_MAX+1][4]; + struct + { + uint16_t ref[QP_MAX+1][3][33]; + uint16_t i4x4_mode[QP_MAX+1][17]; + } *cost_table; const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */ @@ -781,16 +569,16 @@ ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] ); /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ - ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); - ALIGNED_16( pixel i8x8_fdec_buf[16*16] ); + ALIGNED_32( pixel i4x4_fdec_buf[16*16] ); + ALIGNED_32( pixel i8x8_fdec_buf[16*16] ); ALIGNED_64( dctcoef i8x8_dct_buf[3][64] ); ALIGNED_64( dctcoef i4x4_dct_buf[15][16] ); uint32_t i4x4_nnz_buf[4]; uint32_t i8x8_nnz_buf[4]; /* Psy trellis DCT data */ - ALIGNED_16( dctcoef fenc_dct8[4][64] ); - ALIGNED_16( dctcoef fenc_dct4[16][16] ); + ALIGNED_64( dctcoef fenc_dct8[4][64] ); + ALIGNED_64( dctcoef fenc_dct4[16][16] ); /* Psy RD SATD/SA8D scores cache */ ALIGNED_64( uint32_t fenc_satd_cache[32] ); @@ -979,7 +767,7 @@ // included at the end because it needs x264_t #include "macroblock.h" -static int ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +static ALWAYS_INLINE int x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) { int cnt = 0; for( int i = 0; i < i_mvc; i++ ) @@ -995,7 +783,7 @@ return cnt; } -static int ALWAYS_INLINE x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +static ALWAYS_INLINE int x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) { int cnt = 0; int qpel_limit[4] = {mv_limit[0][0] << 2, mv_limit[0][1] << 2, mv_limit[1][0] << 2, mv_limit[1][1] << 2}; @@ -1019,4 +807,3 @@ #include "rectangle.h" #endif - diff -Nru x264-0.152.2854+gite9a5903/common/cpu.c x264-0.158.2988+git-20191101.7817004/common/cpu.c --- x264-0.152.2854+gite9a5903/common/cpu.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/cpu.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu.c: cpu detection ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -25,7 +25,7 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#include "common.h" +#include "base.h" #if HAVE_POSIXTHREAD && SYS_LINUX #include @@ -45,7 +45,7 @@ const x264_cpu_name_t x264_cpu_names[] = { -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 // {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore #define MMX2 X264_CPU_MMX|X264_CPU_MMX2 {"MMX2", MMX2}, @@ -97,7 +97,7 @@ {"", 0}, }; -#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON) +#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON) #include #include static sigjmp_buf jmpbuf; @@ -235,16 +235,8 @@ int model = ((eax>>4)&0xf) + ((eax>>12)&0xf0); if( family == 6 ) { - /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah") - * theoretically support sse2, but it's significantly slower than mmx for - * almost all of x264's functions, so let's just pretend they don't. */ - if( model == 9 || model == 13 || model == 14 ) - { - cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3); - assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4))); - } /* Detect Atom CPU */ - else if( model == 28 ) + if( model == 28 ) { cpu |= X264_CPU_SLOW_ATOM; cpu |= X264_CPU_SLOW_PSHUFB; @@ -296,7 +288,7 @@ else if( cache == 64 ) cpu |= X264_CPU_CACHELINE_64; else - x264_log( NULL, X264_LOG_WARNING, "unable to determine cacheline size\n" ); + x264_log_internal( X264_LOG_WARNING, "unable to determine cacheline size\n" ); } #if STACK_ALIGNMENT < 16 @@ -306,7 +298,7 @@ return cpu; } -#elif ARCH_PPC && HAVE_ALTIVEC +#elif HAVE_ALTIVEC #if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD #include @@ -363,7 +355,7 @@ } #endif -#elif ARCH_ARM +#elif HAVE_ARMV6 void x264_cpu_neon_test( void ); int x264_cpu_fast_neon_mrc_test( void ); @@ -371,7 +363,6 @@ uint32_t x264_cpu_detect( void ) { int flags = 0; -#if HAVE_ARMV6 flags |= X264_CPU_ARMV6; // don't do this hack if compiled with -mfpu=neon @@ -404,26 +395,25 @@ flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) -#endif return flags; } -#elif ARCH_AARCH64 +#elif HAVE_AARCH64 uint32_t x264_cpu_detect( void ) { +#if HAVE_NEON return X264_CPU_ARMV8 | X264_CPU_NEON; +#else + return X264_CPU_ARMV8; +#endif } -#elif ARCH_MIPS +#elif HAVE_MSA uint32_t x264_cpu_detect( void ) { - uint32_t flags = 0; -#if HAVE_MSA - flags |= X264_CPU_MSA; -#endif - return flags; + return X264_CPU_MSA; } #else diff -Nru x264-0.152.2854+gite9a5903/common/cpu.h x264-0.158.2988+git-20191101.7817004/common/cpu.h --- x264-0.152.2854+gite9a5903/common/cpu.h 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/cpu.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu.h: cpu detection ***************************************************************************** - * Copyright (C) 2004-2017 x264 project + * Copyright (C) 2004-2019 x264 project * * Authors: Loren Merritt * @@ -26,8 +26,8 @@ #ifndef X264_CPU_H #define X264_CPU_H -uint32_t x264_cpu_detect( void ); -int x264_cpu_num_processors( void ); +X264_API uint32_t x264_cpu_detect( void ); +X264_API int x264_cpu_num_processors( void ); void x264_cpu_emms( void ); void x264_cpu_sfence( void ); #if HAVE_MMX @@ -46,28 +46,11 @@ #endif #define x264_sfence x264_cpu_sfence -/* kludge: - * gcc can't give variables any greater alignment than the stack frame has. - * We need 32 byte alignment for AVX2, so here we make sure that the stack is - * aligned to 32 bytes. - * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this - * problem, but I don't want to require such a new version. - * aligning to 32 bytes only works if the compiler supports keeping that - * alignment between functions (osdep.h handles manual alignment of arrays - * if it doesn't). - */ -#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)) -intptr_t x264_stack_align( void (*func)(), ... ); -#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__) -#else -#define x264_stack_align(func,...) func(__VA_ARGS__) -#endif - typedef struct { const char *name; uint32_t flags; } x264_cpu_name_t; -extern const x264_cpu_name_t x264_cpu_names[]; +X264_API extern const x264_cpu_name_t x264_cpu_names[]; #endif diff -Nru x264-0.152.2854+gite9a5903/common/dct.c x264-0.158.2988+git-20191101.7817004/common/dct.c --- x264-0.152.2854+gite9a5903/common/dct.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/dct.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -29,83 +29,19 @@ #if HAVE_MMX # include "x86/dct.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/dct.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/dct.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/dct.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/dct.h" #endif -/* the inverse of the scaling factors introduced by 8x8 fdct */ -/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */ -#define W(i) (i==0 ? FIX8(1.0000) :\ - i==1 ? FIX8(0.8859) :\ - i==2 ? FIX8(1.6000) :\ - i==3 ? FIX8(0.9415) :\ - i==4 ? FIX8(1.2651) :\ - i==5 ? FIX8(1.1910) :0) -const uint32_t x264_dct8_weight_tab[64] = { - W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - - W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1) -}; -#undef W - -#define W(i) (i==0 ? FIX8(1.76777) :\ - i==1 ? FIX8(1.11803) :\ - i==2 ? FIX8(0.70711) :0) -const uint32_t x264_dct4_weight_tab[16] = { - W(0), W(1), W(0), W(1), - W(1), W(2), W(1), W(2), - W(0), W(1), W(0), W(1), - W(1), W(2), W(1), W(2) -}; -#undef W - -/* inverse squared */ -#define W(i) (i==0 ? FIX8(3.125) :\ - i==1 ? FIX8(1.25) :\ - i==2 ? FIX8(0.5) :0) -const uint32_t x264_dct4_weight2_tab[16] = { - W(0), W(1), W(0), W(1), - W(1), W(2), W(1), W(2), - W(0), W(1), W(0), W(1), - W(1), W(2), W(1), W(2) -}; -#undef W - -#define W(i) (i==0 ? FIX8(1.00000) :\ - i==1 ? FIX8(0.78487) :\ - i==2 ? FIX8(2.56132) :\ - i==3 ? FIX8(0.88637) :\ - i==4 ? FIX8(1.60040) :\ - i==5 ? FIX8(1.41850) :0) -const uint32_t x264_dct8_weight2_tab[64] = { - W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - - W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), - W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), - W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1) -}; -#undef W - - static void dct4x4dc( dctcoef d[16] ) { dctcoef tmp[16]; @@ -501,7 +437,7 @@ add8x8_idct8( &dst[8*FDEC_STRIDE+8], dct[3] ); } -static void inline add4x4_idct_dc( pixel *p_dst, dctcoef dc ) +static inline void add4x4_idct_dc( pixel *p_dst, dctcoef dc ) { dc = (dc + 32) >> 6; for( int i = 0; i < 4; i++, p_dst += FDEC_STRIDE ) @@ -731,6 +667,7 @@ dctf->sub16x16_dct = x264_sub16x16_dct_altivec; dctf->add8x8_idct_dc = x264_add8x8_idct_dc_altivec; + dctf->add16x16_idct_dc = x264_add16x16_idct_dc_altivec; dctf->add4x4_idct = x264_add4x4_idct_altivec; dctf->add8x8_idct = x264_add8x8_idct_altivec; @@ -745,7 +682,7 @@ } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { dctf->sub4x4_dct = x264_sub4x4_dct_neon; @@ -1059,11 +996,11 @@ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon; -#if ARCH_AARCH64 +#if HAVE_AARCH64 pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon; pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon; pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon; @@ -1073,9 +1010,9 @@ pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon; pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon; pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon; -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 } -#endif // HAVE_ARMV6 || ARCH_AARCH64 +#endif // HAVE_ARMV6 || HAVE_AARCH64 #endif // HIGH_BIT_DEPTH pf_interlaced->interleave_8x8_cavlc = @@ -1128,13 +1065,13 @@ #endif // HIGH_BIT_DEPTH #endif #if !HIGH_BIT_DEPTH -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_interlaced->interleave_8x8_cavlc = pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC ) diff -Nru x264-0.152.2854+gite9a5903/common/dct.h x264-0.158.2988+git-20191101.7817004/common/dct.h --- x264-0.152.2854+gite9a5903/common/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: transform and zigzag ***************************************************************************** - * Copyright (C) 2004-2017 x264 project + * Copyright (C) 2004-2019 x264 project * * Authors: Loren Merritt * @@ -26,11 +26,6 @@ #ifndef X264_DCT_H #define X264_DCT_H -extern const uint32_t x264_dct4_weight_tab[16]; -extern const uint32_t x264_dct8_weight_tab[64]; -extern const uint32_t x264_dct4_weight2_tab[16]; -extern const uint32_t x264_dct8_weight2_tab[64]; - typedef struct { // pix1 stride = FENC_STRIDE @@ -74,7 +69,9 @@ } x264_zigzag_function_t; +#define x264_dct_init x264_template(dct_init) void x264_dct_init( int cpu, x264_dct_function_t *dctf ); +#define x264_zigzag_init x264_template(zigzag_init) void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/deblock.c x264-0.158.2988+git-20191101.7817004/common/deblock.c --- x264-0.152.2854+gite9a5903/common/deblock.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/deblock.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: deblocking ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -337,7 +337,7 @@ pf_intra( pix, i_stride, alpha, beta ); } -static ALWAYS_INLINE void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y ) +static ALWAYS_INLINE void macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y ) { int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2; @@ -383,6 +383,7 @@ int qp_thresh = 15 - X264_MIN( a, b ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset ); int stridey = h->fdec->i_stride[0]; int strideuv = h->fdec->i_stride[1]; + int chroma_format = CHROMA_FORMAT; int chroma444 = CHROMA444; int chroma_height = 16 >> CHROMA_V_SHIFT; intptr_t uvdiff = chroma444 ? h->fdec->plane[2] - h->fdec->plane[1] : 1; @@ -390,7 +391,7 @@ for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced ) { x264_prefetch_fenc( h, h->fdec, mb_x, mb_y ); - x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y ); + macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y ); int mb_xy = h->mb.i_mb_xy; int transform_8x8 = h->mb.mb_transform_size[mb_xy]; @@ -420,7 +421,7 @@ deblock_edge##intra( h, pixy + 4*edge*(dir?stride2y:1),\ stride2y, bs[dir][edge], qp, a, b, 0,\ h->loopf.deblock_luma##intra[dir] );\ - if( CHROMA_FORMAT == CHROMA_444 )\ + if( chroma_format == CHROMA_444 )\ {\ deblock_edge##intra( h, pixuv + 4*edge*(dir?stride2uv:1),\ stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\ @@ -429,14 +430,14 @@ stride2uv, bs[dir][edge], chroma_qp, a, b, 0,\ h->loopf.deblock_luma##intra[dir] );\ }\ - else if( CHROMA_FORMAT == CHROMA_420 && !(edge & 1) )\ + else if( chroma_format == CHROMA_420 && !(edge & 1) )\ {\ deblock_edge##intra( h, pixuv + edge*(dir?2*stride2uv:4),\ stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\ h->loopf.deblock_chroma##intra[dir] );\ }\ }\ - if( CHROMA_FORMAT == CHROMA_422 && (dir || !(edge & 1)) )\ + if( chroma_format == CHROMA_422 && (dir || !(edge & 1)) )\ {\ deblock_edge##intra( h, pixuv + edge*(dir?4*stride2uv:4),\ stride2uv, bs[dir][edge], chroma_qp, a, b, 1,\ @@ -463,16 +464,22 @@ if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] ) ) { deblock_edge_intra( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_intra_deblock ); - deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock ); - if( chroma444 ) - deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock ); + if( chroma_format ) + { + deblock_edge_intra( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock ); + if( chroma444 ) + deblock_edge_intra( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_intra_deblock ); + } } else { deblock_edge( h, pixy, 2*stridey, bs[0][0], luma_qp[0], a, b, 0, luma_deblock ); - deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock ); - if( chroma444 ) - deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock ); + if( chroma_format ) + { + deblock_edge( h, pixuv, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock ); + if( chroma444 ) + deblock_edge( h, pixuv + uvdiff, 2*strideuv, bs[0][0], chroma_qp[0], a, b, c, chroma_deblock ); + } } int offy = MB_INTERLACED ? 4 : 0; @@ -483,16 +490,22 @@ if( intra_cur || IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[1]] ) ) { deblock_edge_intra( h, pixy + (stridey<loopf.deblock_luma[1] ); deblock_edge( h, pixuv + uvdiff + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 0, h->loopf.deblock_luma[1] ); } - else + else if( chroma_format ) deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs[1][4*j], qpc_top, a, b, 1, h->loopf.deblock_chroma[1] ); } } @@ -652,117 +665,19 @@ } #if HAVE_MMX -void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); -void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); -void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); -void x264_deblock_strength_avx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); -void x264_deblock_strength_avx512( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); - -void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); -#if ARCH_X86 -void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); - -#if HIGH_BIT_DEPTH -void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); -#else -// FIXME this wrapper has a significant cpu cost -static void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ) -{ - x264_deblock_v8_luma_mmx2( pix, stride, alpha, beta, tc0 ); - x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 ); -} -static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta ) -{ - x264_deblock_v8_luma_intra_mmx2( pix, stride, alpha, beta ); - x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta ); -} -#endif // HIGH_BIT_DEPTH +#include "x86/deblock.h" #endif +#if HAVE_ALTIVEC +#include "ppc/deblock.h" #endif - -#if ARCH_PPC -void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -#endif // ARCH_PPC - -#if HAVE_ARMV6 || ARCH_AARCH64 -void x264_deblock_v_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_neon ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], - int mvy_limit, int bframe ); -void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_422_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_luma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#if HAVE_ARMV6 +#include "arm/deblock.h" #endif - -#if !HIGH_BIT_DEPTH -#if HAVE_MSA -void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); -void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); -void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], - int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, - int bframe ); +#if HAVE_AARCH64 +#include "aarch64/deblock.h" #endif +#if HAVE_MSA +#include "mips/deblock.h" #endif void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) @@ -867,7 +782,7 @@ } #endif // HAVE_ALTIVEC -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->deblock_luma[1] = x264_deblock_v_luma_neon; diff -Nru x264-0.152.2854+gite9a5903/common/frame.c x264-0.158.2988+git-20191101.7817004/common/frame.c --- x264-0.152.2854+gite9a5903/common/frame.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/frame.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * frame.c: frame handling ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -42,37 +42,24 @@ return x; } -static int x264_frame_internal_csp( int external_csp ) +static int frame_internal_csp( int external_csp ) { - switch( external_csp & X264_CSP_MASK ) - { - case X264_CSP_NV12: - case X264_CSP_NV21: - case X264_CSP_I420: - case X264_CSP_YV12: - return X264_CSP_NV12; - case X264_CSP_NV16: - case X264_CSP_I422: - case X264_CSP_YV16: - case X264_CSP_YUYV: - case X264_CSP_UYVY: - case X264_CSP_V210: - return X264_CSP_NV16; - case X264_CSP_I444: - case X264_CSP_YV24: - case X264_CSP_BGR: - case X264_CSP_BGRA: - case X264_CSP_RGB: - return X264_CSP_I444; - default: - return X264_CSP_NONE; - } + int csp = external_csp & X264_CSP_MASK; + if( csp == X264_CSP_I400 ) + return X264_CSP_I400; + if( csp >= X264_CSP_I420 && csp < X264_CSP_I422 ) + return X264_CSP_NV12; + if( csp >= X264_CSP_I422 && csp < X264_CSP_I444 ) + return X264_CSP_NV16; + if( csp >= X264_CSP_I444 && csp <= X264_CSP_RGB ) + return X264_CSP_I444; + return X264_CSP_NONE; } -static x264_frame_t *x264_frame_new( x264_t *h, int b_fdec ) +static x264_frame_t *frame_new( x264_t *h, int b_fdec ) { x264_frame_t *frame; - int i_csp = x264_frame_internal_csp( h->param.i_csp ); + int i_csp = frame_internal_csp( h->param.i_csp ); int i_mb_count = h->mb.i_mb_count; int i_stride, i_width, i_lines, luma_plane_count; int i_padv = PADV << PARAM_INTERLACED; @@ -89,6 +76,9 @@ int disalign = 1<<10; #endif + /* ensure frame alignment after PADH is added */ + int padh_align = X264_MAX( align - PADH * sizeof(pixel), 0 ) / sizeof(pixel); + CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) ); PREALLOC_INIT @@ -119,6 +109,14 @@ frame->i_stride[i] = i_stride; } } + else if( i_csp == X264_CSP_I400 ) + { + luma_plane_count = 1; + frame->i_plane = 1; + frame->i_width[0] = i_width; + frame->i_lines[0] = i_lines; + frame->i_stride[0] = i_stride; + } else goto fail; @@ -154,9 +152,9 @@ { int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12); int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv)); - PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) ); + PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * sizeof(pixel) ); if( PARAM_INTERLACED ) - PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) ); + PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * sizeof(pixel) ); } /* all 4 luma planes allocated together, since the cacheline split code @@ -166,18 +164,12 @@ { int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); if( h->param.analyse.i_subpel_refine && b_fdec ) - { - /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */ - PREALLOC( frame->buffer[p], 4*luma_plane_size * sizeof(pixel) ); - if( PARAM_INTERLACED ) - PREALLOC( frame->buffer_fld[p], 4*luma_plane_size * sizeof(pixel) ); - } - else - { - PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) ); - if( PARAM_INTERLACED ) - PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) ); - } + luma_plane_size *= 4; + + /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */ + PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * sizeof(pixel) ); + if( PARAM_INTERLACED ) + PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * sizeof(pixel) ); } frame->b_duplicate = 0; @@ -215,7 +207,7 @@ { int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); - PREALLOC( frame->buffer_lowres[0], 4 * luma_plane_size * sizeof(pixel) ); + PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) ); for( int j = 0; j <= !!h->param.i_bframe; j++ ) for( int i = 0; i <= h->param.i_bframe; i++ ) @@ -245,9 +237,9 @@ if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 ) { int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12); - frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH; + frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align; if( PARAM_INTERLACED ) - frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH; + frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * chroma_padv + PADH + padh_align; } for( int p = 0; p < luma_plane_count; p++ ) @@ -257,16 +249,16 @@ { for( int i = 0; i < 4; i++ ) { - frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; - frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH; + frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align; + frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align; } frame->plane[p] = frame->filtered[p][0]; frame->plane_fld[p] = frame->filtered_fld[p][0]; } else { - frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH; - frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH; + frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align; + frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align; } } @@ -284,7 +276,7 @@ { int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); for( int i = 0; i < 4; i++ ) - frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * PADV + PADH) + i * luma_plane_size; + frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size; for( int j = 0; j <= !!h->param.i_bframe; j++ ) for( int i = 0; i <= h->param.i_bframe; i++ ) @@ -366,7 +358,7 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src ) { int i_csp = src->img.i_csp & X264_CSP_MASK; - if( dst->i_csp != x264_frame_internal_csp( i_csp ) ) + if( dst->i_csp != frame_internal_csp( i_csp ) ) { x264_log( h, X264_LOG_ERROR, "Invalid input colorspace\n" ); return -1; @@ -470,7 +462,7 @@ (pixel*)pix[2], stride[2]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift ); } - else //if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 ) + else if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 ) { get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 ); get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 ); @@ -483,7 +475,7 @@ return 0; } -static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size ) +static ALWAYS_INLINE void pixel_memset( pixel *dst, pixel *src, int len, int size ) { uint8_t *dstp = (uint8_t*)dst; uint32_t v1 = *src; @@ -535,7 +527,7 @@ } } -static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma ) +static ALWAYS_INLINE void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma ) { #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride ) for( int y = 0; y < i_height; y++ ) @@ -784,7 +776,7 @@ if( h->frames.unused[b_fdec][0] ) frame = x264_frame_pop( h->frames.unused[b_fdec] ); else - frame = x264_frame_new( h, b_fdec ); + frame = frame_new( h, b_fdec ); if( !frame ) return NULL; frame->b_last_minigop_bframe = 0; diff -Nru x264-0.152.2854+gite9a5903/common/frame.h x264-0.158.2988+git-20191101.7817004/common/frame.h --- x264-0.152.2854+gite9a5903/common/frame.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/frame.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * frame.h: frame handling ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -86,7 +86,7 @@ * allocated data are stored in buffer */ pixel *buffer[4]; pixel *buffer_fld[4]; - pixel *buffer_lowres[4]; + pixel *buffer_lowres; x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */ pixel *weighted[X264_REF_MAX]; /* plane[0] weighted of the reference frames */ @@ -216,47 +216,80 @@ int bframe ); } x264_deblock_function_t; +#define x264_frame_delete x264_template(frame_delete) void x264_frame_delete( x264_frame_t *frame ); +#define x264_frame_copy_picture x264_template(frame_copy_picture) int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src ); +#define x264_frame_expand_border x264_template(frame_expand_border) void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y ); +#define x264_frame_expand_border_filtered x264_template(frame_expand_border_filtered) void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ); +#define x264_frame_expand_border_lowres x264_template(frame_expand_border_lowres) void x264_frame_expand_border_lowres( x264_frame_t *frame ); +#define x264_frame_expand_border_chroma x264_template(frame_expand_border_chroma) void x264_frame_expand_border_chroma( x264_t *h, x264_frame_t *frame, int plane ); +#define x264_frame_expand_border_mod16 x264_template(frame_expand_border_mod16) void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame ); +#define x264_expand_border_mbpair x264_template(expand_border_mbpair) void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y ); +#define x264_frame_deblock_row x264_template(frame_deblock_row) void x264_frame_deblock_row( x264_t *h, int mb_y ); +#define x264_macroblock_deblock x264_template(macroblock_deblock) void x264_macroblock_deblock( x264_t *h ); +#define x264_frame_filter x264_template(frame_filter) void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end ); +#define x264_frame_init_lowres x264_template(frame_init_lowres) void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame ); +#define x264_deblock_init x264_template(deblock_init) void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ); +#define x264_frame_cond_broadcast x264_template(frame_cond_broadcast) void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); +#define x264_frame_cond_wait x264_template(frame_cond_wait) void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); +#define x264_frame_new_slice x264_template(frame_new_slice) int x264_frame_new_slice( x264_t *h, x264_frame_t *frame ); +#define x264_threadslice_cond_broadcast x264_template(threadslice_cond_broadcast) void x264_threadslice_cond_broadcast( x264_t *h, int pass ); +#define x264_threadslice_cond_wait x264_template(threadslice_cond_wait) void x264_threadslice_cond_wait( x264_t *h, int pass ); -void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); -x264_frame_t *x264_frame_pop( x264_frame_t **list ); -void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); -x264_frame_t *x264_frame_shift( x264_frame_t **list ); +#define x264_frame_push x264_template(frame_push) +X264_API void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); +#define x264_frame_pop x264_template(frame_pop) +X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list ); +#define x264_frame_unshift x264_template(frame_unshift) +X264_API void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); +#define x264_frame_shift x264_template(frame_shift) +X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list ); + +#define x264_frame_push_unused x264_template(frame_push_unused) void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ); +#define x264_frame_push_blank_unused x264_template(frame_push_blank_unused) void x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame ); +#define x264_frame_pop_blank_unused x264_template(frame_pop_blank_unused) x264_frame_t *x264_frame_pop_blank_unused( x264_t *h ); +#define x264_weight_scale_plane x264_template(weight_scale_plane) void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, int i_width, int i_height, x264_weight_t *w ); +#define x264_frame_pop_unused x264_template(frame_pop_unused) x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec ); +#define x264_frame_delete_list x264_template(frame_delete_list) void x264_frame_delete_list( x264_frame_t **list ); +#define x264_sync_frame_list_init x264_template(sync_frame_list_init) int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem ); +#define x264_sync_frame_list_delete x264_template(sync_frame_list_delete) void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist ); +#define x264_sync_frame_list_push x264_template(sync_frame_list_push) void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame ); +#define x264_sync_frame_list_pop x264_template(sync_frame_list_pop) x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/macroblock.c x264-0.158.2988+git-20191101.7817004/common/macroblock.c --- x264-0.152.2854+gite9a5903/common/macroblock.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/macroblock.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock common functions ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Fiona Glaser * Laurent Aimar @@ -34,7 +34,7 @@ mvx, mvy, 4*width, 4*height, \ list ? x264_weight_none : &h->sh.weight[i_ref][p] ); -static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height ) +static NOINLINE void mb_mc_0xywh( x264_t *h, int x, int y, int width, int height ) { int i8 = x264_scan8[0]+x+8*y; int i_ref = h->mb.cache.ref[0][i8]; @@ -48,7 +48,7 @@ MC_LUMA( 0, 1 ); MC_LUMA( 0, 2 ); } - else + else if( CHROMA_FORMAT ) { int v_shift = CHROMA_V_SHIFT; // Chroma in 4:2:0 is offset if MCing from a field of opposite parity @@ -73,7 +73,7 @@ &h->sh.weight[i_ref][2], height ); } } -static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int height ) +static NOINLINE void mb_mc_1xywh( x264_t *h, int x, int y, int width, int height ) { int i8 = x264_scan8[0]+x+8*y; int i_ref = h->mb.cache.ref[1][i8]; @@ -87,7 +87,7 @@ MC_LUMA( 1, 1 ); MC_LUMA( 1, 2 ); } - else + else if( CHROMA_FORMAT ) { int v_shift = CHROMA_V_SHIFT; if( v_shift & MB_INTERLACED & i_ref ) @@ -109,7 +109,7 @@ h->mc.avg[i_mode]( &h->mb.pic.p_fdec[p][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE, \ src0, i_stride0, src1, i_stride1, weight ); -static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int height ) +static NOINLINE void mb_mc_01xywh( x264_t *h, int x, int y, int width, int height ) { int i8 = x264_scan8[0]+x+8*y; int i_ref0 = h->mb.cache.ref[0][i8]; @@ -132,7 +132,7 @@ MC_LUMA_BI( 1 ); MC_LUMA_BI( 2 ); } - else + else if( CHROMA_FORMAT ) { int v_shift = CHROMA_V_SHIFT; if( v_shift & MB_INTERLACED & i_ref0 ) @@ -165,21 +165,21 @@ switch( h->mb.i_sub_partition[i8] ) { case D_L0_8x8: - x264_mb_mc_0xywh( h, x, y, 2, 2 ); + mb_mc_0xywh( h, x, y, 2, 2 ); break; case D_L0_8x4: - x264_mb_mc_0xywh( h, x, y+0, 2, 1 ); - x264_mb_mc_0xywh( h, x, y+1, 2, 1 ); + mb_mc_0xywh( h, x, y+0, 2, 1 ); + mb_mc_0xywh( h, x, y+1, 2, 1 ); break; case D_L0_4x8: - x264_mb_mc_0xywh( h, x+0, y, 1, 2 ); - x264_mb_mc_0xywh( h, x+1, y, 1, 2 ); + mb_mc_0xywh( h, x+0, y, 1, 2 ); + mb_mc_0xywh( h, x+1, y, 1, 2 ); break; case D_L0_4x4: - x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 ); - x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 ); - x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 ); + mb_mc_0xywh( h, x+0, y+0, 1, 1 ); + mb_mc_0xywh( h, x+1, y+0, 1, 1 ); + mb_mc_0xywh( h, x+0, y+1, 1, 1 ); + mb_mc_0xywh( h, x+1, y+1, 1, 1 ); break; } } @@ -189,11 +189,11 @@ if( h->mb.cache.ref[0][scan8] >= 0 ) if( h->mb.cache.ref[1][scan8] >= 0 ) - x264_mb_mc_01xywh( h, x, y, 2, 2 ); + mb_mc_01xywh( h, x, y, 2, 2 ); else - x264_mb_mc_0xywh( h, x, y, 2, 2 ); + mb_mc_0xywh( h, x, y, 2, 2 ); else - x264_mb_mc_1xywh( h, x, y, 2, 2 ); + mb_mc_1xywh( h, x, y, 2, 2 ); } } @@ -214,33 +214,33 @@ if( h->mb.i_partition == D_16x16 ) { if( ref0a >= 0 ) - if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 ); - else x264_mb_mc_0xywh ( h, 0, 0, 4, 4 ); - else x264_mb_mc_1xywh ( h, 0, 0, 4, 4 ); + if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 4, 4 ); + else mb_mc_0xywh ( h, 0, 0, 4, 4 ); + else mb_mc_1xywh ( h, 0, 0, 4, 4 ); } else if( h->mb.i_partition == D_16x8 ) { if( ref0a >= 0 ) - if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 ); - else x264_mb_mc_0xywh ( h, 0, 0, 4, 2 ); - else x264_mb_mc_1xywh ( h, 0, 0, 4, 2 ); + if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 4, 2 ); + else mb_mc_0xywh ( h, 0, 0, 4, 2 ); + else mb_mc_1xywh ( h, 0, 0, 4, 2 ); if( ref0b >= 0 ) - if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 ); - else x264_mb_mc_0xywh ( h, 0, 2, 4, 2 ); - else x264_mb_mc_1xywh ( h, 0, 2, 4, 2 ); + if( ref1b >= 0 ) mb_mc_01xywh( h, 0, 2, 4, 2 ); + else mb_mc_0xywh ( h, 0, 2, 4, 2 ); + else mb_mc_1xywh ( h, 0, 2, 4, 2 ); } else if( h->mb.i_partition == D_8x16 ) { if( ref0a >= 0 ) - if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 ); - else x264_mb_mc_0xywh ( h, 0, 0, 2, 4 ); - else x264_mb_mc_1xywh ( h, 0, 0, 2, 4 ); + if( ref1a >= 0 ) mb_mc_01xywh( h, 0, 0, 2, 4 ); + else mb_mc_0xywh ( h, 0, 0, 2, 4 ); + else mb_mc_1xywh ( h, 0, 0, 2, 4 ); if( ref0b >= 0 ) - if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 ); - else x264_mb_mc_0xywh ( h, 2, 0, 2, 4 ); - else x264_mb_mc_1xywh ( h, 2, 0, 2, 4 ); + if( ref1b >= 0 ) mb_mc_01xywh( h, 2, 0, 2, 4 ); + else mb_mc_0xywh ( h, 2, 0, 2, 4 ); + else mb_mc_1xywh ( h, 2, 0, 2, 4 ); } } } @@ -531,17 +531,20 @@ */ h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf; h->mb.pic.p_fdec[0] = h->mb.pic.fdec_buf + 2*FDEC_STRIDE; - h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE; - h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE; - if( CHROMA444 ) - { - h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE; - h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE; - } - else + if( CHROMA_FORMAT ) { - h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8; - h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16; + h->mb.pic.p_fenc[1] = h->mb.pic.fenc_buf + 16*FENC_STRIDE; + h->mb.pic.p_fdec[1] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE; + if( CHROMA444 ) + { + h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 32*FENC_STRIDE; + h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 38*FDEC_STRIDE; + } + else + { + h->mb.pic.p_fenc[2] = h->mb.pic.fenc_buf + 16*FENC_STRIDE + 8; + h->mb.pic.p_fdec[2] = h->mb.pic.fdec_buf + 20*FDEC_STRIDE + 16; + } } } @@ -562,7 +565,7 @@ dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE]; } -static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff ) +static ALWAYS_INLINE void macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff ) { int mb_interlaced = b_mbaff && MB_INTERLACED; int height = b_chroma ? 16 >> CHROMA_V_SHIFT : 16; @@ -666,7 +669,7 @@ {{ 4, 5, 6, 3}, { 3, 7, 11, 15}, {16+1, 16+5, 32+1, 32+5}, {0, 1, 2, 3}, {0, 0, 1, 1}} }; -static void ALWAYS_INLINE x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced ) +static ALWAYS_INLINE void macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y, int b_interlaced ) { const int mb_interlaced = b_interlaced && MB_INTERLACED; int top_y = mb_y - (1 << mb_interlaced); @@ -848,9 +851,9 @@ # define LBOT 0 #endif -static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff ) +static ALWAYS_INLINE void macroblock_cache_load( x264_t *h, int mb_x, int mb_y, int b_mbaff ) { - x264_macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff ); + macroblock_cache_load_neighbours( h, mb_x, mb_y, b_mbaff ); int *left = h->mb.i_mb_left_xy; int top = h->mb.i_mb_top_xy; @@ -996,17 +999,17 @@ { x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE ); - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 0 ); if( CHROMA444 ) { x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+ 4*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+15+12*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+ 4*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+15+12*FDEC_STRIDE ); - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 ); - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 0 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 0 ); } - else + else if( CHROMA_FORMAT ) { x264_copy_column8( h->mb.pic.p_fdec[1]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+ 4*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[2]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+ 4*FDEC_STRIDE ); @@ -1015,19 +1018,19 @@ x264_copy_column8( h->mb.pic.p_fdec[1]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+ 7+12*FDEC_STRIDE ); x264_copy_column8( h->mb.pic.p_fdec[2]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+ 7+12*FDEC_STRIDE ); } - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 0 ); } } else { - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 0, 0, 1 ); if( CHROMA444 ) { - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 ); - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 0, 1 ); + macroblock_load_pic_pointers( h, mb_x, mb_y, 2, 0, 1 ); } - else - x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 ); + else if( CHROMA_FORMAT ) + macroblock_load_pic_pointers( h, mb_x, mb_y, 1, 1, 1 ); } if( h->fdec->integral ) @@ -1348,15 +1351,15 @@ void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y ) { - x264_macroblock_cache_load( h, mb_x, mb_y, 0 ); + macroblock_cache_load( h, mb_x, mb_y, 0 ); } void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y ) { - x264_macroblock_cache_load( h, mb_x, mb_y, 1 ); + macroblock_cache_load( h, mb_x, mb_y, 1 ); } -static void x264_macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] ) +static void macroblock_deblock_strength_mbaff( x264_t *h, uint8_t (*bs)[8][4] ) { if( (h->mb.i_neighbour & MB_LEFT) && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED ) { @@ -1613,10 +1616,10 @@ bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B ); if( SLICE_MBAFF ) - x264_macroblock_deblock_strength_mbaff( h, bs ); + macroblock_deblock_strength_mbaff( h, bs ); } -static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff ) +static ALWAYS_INLINE void macroblock_store_pic( x264_t *h, int mb_x, int mb_y, int i, int b_chroma, int b_mbaff ) { int height = b_chroma ? 16>>CHROMA_V_SHIFT : 16; int i_stride = h->fdec->i_stride[i]; @@ -1630,7 +1633,7 @@ h->mc.copy[PIXEL_16x16]( &h->fdec->plane[i][i_pix_offset], i_stride2, h->mb.pic.p_fdec[i], FDEC_STRIDE, 16 ); } -static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff ) +static ALWAYS_INLINE void macroblock_backup_intra( x264_t *h, int mb_x, int mb_y, int b_mbaff ) { /* In MBAFF we store the last two rows in intra_border_backup[0] and [1]. * For progressive mbs this is the bottom two rows, and for interlaced the @@ -1643,7 +1646,7 @@ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*sizeof(pixel) ); memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*sizeof(pixel) ); } - else + else if( CHROMA_FORMAT ) { int backup_src = (15>>CHROMA_V_SHIFT) * FDEC_STRIDE; memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) ); @@ -1661,7 +1664,7 @@ memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 16*sizeof(pixel) ); memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+backup_src, 16*sizeof(pixel) ); } - else + else if( CHROMA_FORMAT ) { if( CHROMA_FORMAT == CHROMA_420 ) backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE; @@ -1688,27 +1691,27 @@ if( SLICE_MBAFF ) { - x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 ); - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 1 ); + macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 1 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 1 ); if( CHROMA444 ) { - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 ); - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 1 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 1 ); } - else - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 ); + else if( CHROMA_FORMAT ) + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 1 ); } else { - x264_macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 ); - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 0 ); + macroblock_backup_intra( h, h->mb.i_mb_x, h->mb.i_mb_y, 0 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 0, 0 ); if( CHROMA444 ) { - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 ); - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 0, 0 ); + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 2, 0, 0 ); } - else - x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 ); + else if( CHROMA_FORMAT ) + macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1, 0 ); } x264_prefetch_fenc( h, h->fdec, h->mb.i_mb_x, h->mb.i_mb_y ); diff -Nru x264-0.152.2854+gite9a5903/common/macroblock.h x264-0.158.2988+git-20191101.7817004/common/macroblock.h --- x264-0.152.2854+gite9a5903/common/macroblock.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/macroblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock common functions ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -299,38 +299,54 @@ }; /* Per-frame allocation: is allocated per-thread only in frame-threads mode. */ +#define x264_macroblock_cache_allocate x264_template(macroblock_cache_allocate) int x264_macroblock_cache_allocate( x264_t *h ); +#define x264_macroblock_cache_free x264_template(macroblock_cache_free) void x264_macroblock_cache_free( x264_t *h ); /* Per-thread allocation: is allocated per-thread even in sliced-threads mode. */ +#define x264_macroblock_thread_allocate x264_template(macroblock_thread_allocate) int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ); +#define x264_macroblock_thread_free x264_template(macroblock_thread_free) void x264_macroblock_thread_free( x264_t *h, int b_lookahead ); +#define x264_macroblock_slice_init x264_template(macroblock_slice_init) void x264_macroblock_slice_init( x264_t *h ); +#define x264_macroblock_thread_init x264_template(macroblock_thread_init) void x264_macroblock_thread_init( x264_t *h ); +#define x264_macroblock_cache_load_interlaced x264_template(macroblock_cache_load_interlaced) void x264_macroblock_cache_load_progressive( x264_t *h, int mb_x, int mb_y ); +#define x264_macroblock_cache_load_progressive x264_template(macroblock_cache_load_progressive) void x264_macroblock_cache_load_interlaced( x264_t *h, int mb_x, int mb_y ); +#define x264_macroblock_deblock_strength x264_template(macroblock_deblock_strength) void x264_macroblock_deblock_strength( x264_t *h ); +#define x264_macroblock_cache_save x264_template(macroblock_cache_save) void x264_macroblock_cache_save( x264_t *h ); +#define x264_macroblock_bipred_init x264_template(macroblock_bipred_init) void x264_macroblock_bipred_init( x264_t *h ); +#define x264_prefetch_fenc x264_template(prefetch_fenc) void x264_prefetch_fenc( x264_t *h, x264_frame_t *fenc, int i_mb_x, int i_mb_y ); +#define x264_copy_column8 x264_template(copy_column8) void x264_copy_column8( pixel *dst, pixel *src ); /* x264_mb_predict_mv_16x16: * set mvp with predicted mv for D_16x16 block * h->mb. need only valid values from other blocks */ +#define x264_mb_predict_mv_16x16 x264_template(mb_predict_mv_16x16) void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] ); /* x264_mb_predict_mv_pskip: * set mvp with predicted mv for P_SKIP * h->mb. need only valid values from other blocks */ +#define x264_mb_predict_mv_pskip x264_template(mb_predict_mv_pskip) void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] ); /* x264_mb_predict_mv: * set mvp with predicted mv for all blocks except SKIP and DIRECT * h->mb. need valid ref/partition/sub of current block to be valid * and valid mv/ref from other blocks. */ +#define x264_mb_predict_mv x264_template(mb_predict_mv) void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] ); /* x264_mb_predict_mv_direct16x16: * set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT @@ -338,14 +354,18 @@ * return 1 on success, 0 on failure. * if b_changed != NULL, set it to whether refs or mvs differ from * before this functioncall. */ +#define x264_mb_predict_mv_direct16x16 x264_template(mb_predict_mv_direct16x16) int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed ); /* x264_mb_predict_mv_ref16x16: * set mvc with D_16x16 prediction. * uses all neighbors, even those that didn't end up using this ref. * h->mb. need only valid values from other blocks */ +#define x264_mb_predict_mv_ref16x16 x264_template(mb_predict_mv_ref16x16) void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc ); +#define x264_mb_mc x264_template(mb_mc) void x264_mb_mc( x264_t *h ); +#define x264_mb_mc_8x8 x264_template(mb_mc_8x8) void x264_mb_mc_8x8( x264_t *h, int i8 ); static ALWAYS_INLINE uint32_t pack16to32( uint32_t a, uint32_t b ) @@ -441,4 +461,3 @@ } #endif - diff -Nru x264-0.152.2854+gite9a5903/common/mc.c x264-0.158.2988+git-20191101.7817004/common/mc.c --- x264-0.152.2854+gite9a5903/common/mc.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mc.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -29,16 +29,16 @@ #if HAVE_MMX #include "x86/mc.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC #include "ppc/mc.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 #include "arm/mc.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/mc.h" #endif -#if ARCH_MIPS +#if HAVE_MSA #include "mips/mc.h" #endif @@ -107,7 +107,7 @@ PIXEL_AVG_C( pixel_avg_2x4, 2, 4 ) PIXEL_AVG_C( pixel_avg_2x2, 2, 2 ) -static void x264_weight_cache( x264_t *h, x264_weight_t *w ) +static void weight_cache( x264_t *h, x264_weight_t *w ) { w->weightfn = h->mc.weight; } @@ -134,7 +134,7 @@ } #define MC_WEIGHT_C( name, width ) \ - static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \ +static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \ { \ mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\ } @@ -146,7 +146,7 @@ MC_WEIGHT_C( mc_weight_w4, 4 ) MC_WEIGHT_C( mc_weight_w2, 2 ) -static weight_fn_t x264_mc_weight_wtab[6] = +static weight_fn_t mc_weight_wtab[6] = { mc_weight_w2, mc_weight_w4, @@ -155,7 +155,7 @@ mc_weight_w16, mc_weight_w20, }; -const x264_weight_t x264_weight_none[3] = { {{0}} }; + static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height ) { for( int y = 0; y < i_height; y++ ) @@ -192,9 +192,6 @@ } } -const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2}; - static void mc_luma( pixel *dst, intptr_t i_dst_stride, pixel *src[4], intptr_t i_src_stride, int mvx, int mvy, @@ -336,10 +333,10 @@ } } -static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta, - pixel *dstb, intptr_t i_dstb, - pixel *dstc, intptr_t i_dstc, - pixel *src, intptr_t i_src, int pw, int w, int h ) +static void plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta, + pixel *dstb, intptr_t i_dstb, + pixel *dstc, intptr_t i_dstc, + pixel *src, intptr_t i_src, int pw, int w, int h ) { for( int y=0; yavg[PIXEL_2x4] = pixel_avg_2x4; pf->avg[PIXEL_2x2] = pixel_avg_2x2; - pf->weight = x264_mc_weight_wtab; - pf->offsetadd = x264_mc_weight_wtab; - pf->offsetsub = x264_mc_weight_wtab; - pf->weight_cache = x264_weight_cache; + pf->weight = mc_weight_wtab; + pf->offsetadd = mc_weight_wtab; + pf->offsetsub = mc_weight_wtab; + pf->weight_cache = weight_cache; pf->copy_16x16_unaligned = mc_copy_w16; pf->copy[PIXEL_16x16] = mc_copy_w16; @@ -647,13 +644,15 @@ pf->plane_copy = x264_plane_copy_c; pf->plane_copy_swap = x264_plane_copy_swap_c; pf->plane_copy_interleave = x264_plane_copy_interleave_c; + pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c; pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_c; - pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c; - pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_c; + pf->plane_copy_deinterleave_rgb = plane_copy_deinterleave_rgb_c; + pf->plane_copy_deinterleave_v210 = plane_copy_deinterleave_v210_c; pf->hpel_filter = hpel_filter; + pf->prefetch_fenc_400 = prefetch_fenc_null; pf->prefetch_fenc_420 = prefetch_fenc_null; pf->prefetch_fenc_422 = prefetch_fenc_null; pf->prefetch_ref = prefetch_ref_null; @@ -681,7 +680,7 @@ #if HAVE_ARMV6 x264_mc_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_mc_init_aarch64( cpu, pf ); #endif #if HAVE_MSA diff -Nru x264-0.152.2854+gite9a5903/common/mc.h x264-0.158.2988+git-20191101.7817004/common/mc.h --- x264-0.152.2854+gite9a5903/common/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: motion compensation ***************************************************************************** - * Copyright (C) 2004-2017 x264 project + * Copyright (C) 2004-2019 x264 project * * Authors: Loren Merritt * @@ -34,14 +34,15 @@ MC_CLIP_ADD((s)[1], (x)[1]);\ } while( 0 ) +#define x264_mbtree_propagate_list_internal_neon x264_template(mbtree_propagate_list_internal_neon) #define PROPAGATE_LIST(cpu)\ void x264_mbtree_propagate_list_internal_##cpu( int16_t (*mvs)[2], int16_t *propagate_amount,\ uint16_t *lowres_costs, int16_t *output,\ int bipred_weight, int mb_y, int len );\ \ -static void x264_mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\ - int16_t *propagate_amount, uint16_t *lowres_costs,\ - int bipred_weight, int mb_y, int len, int list )\ +static void mbtree_propagate_list_##cpu( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],\ + int16_t *propagate_amount, uint16_t *lowres_costs,\ + int bipred_weight, int mb_y, int len, int list )\ {\ int16_t *current = h->scratch_buffer2;\ \ @@ -100,10 +101,11 @@ }\ } +#define x264_plane_copy_c x264_template(plane_copy_c) void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); #define PLANE_COPY(align, cpu)\ -static void x264_plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ +static void plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ {\ int c_w = (align) / sizeof(pixel) - 1;\ if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\ @@ -128,10 +130,11 @@ }\ } +#define x264_plane_copy_swap_c x264_template(plane_copy_swap_c) void x264_plane_copy_swap_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); #define PLANE_COPY_SWAP(align, cpu)\ -static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ +static void plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ {\ int c_w = (align>>1) / sizeof(pixel) - 1;\ if( !(w&c_w) )\ @@ -160,14 +163,15 @@ x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\ } +#define x264_plane_copy_deinterleave_c x264_template(plane_copy_deinterleave_c) void x264_plane_copy_deinterleave_c( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *src, intptr_t i_src, int w, int h ); /* We can utilize existing plane_copy_deinterleave() functions for YUYV/UYUV * input with the additional constraint that we cannot overread src. */ #define PLANE_COPY_YUYV(align, cpu)\ -static void x264_plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\ - pixel *src, intptr_t i_src, int w, int h )\ +static void plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\ + pixel *src, intptr_t i_src, int w, int h )\ {\ int c_w = (align>>1) / sizeof(pixel) - 1;\ if( !(w&c_w) )\ @@ -193,14 +197,15 @@ x264_plane_copy_deinterleave_c( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\ } +#define x264_plane_copy_interleave_c x264_template(plane_copy_interleave_c) void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); #define PLANE_INTERLEAVE(cpu) \ -static void x264_plane_copy_interleave_##cpu( pixel *dst, intptr_t i_dst,\ - pixel *srcu, intptr_t i_srcu,\ - pixel *srcv, intptr_t i_srcv, int w, int h )\ +static void plane_copy_interleave_##cpu( pixel *dst, intptr_t i_dst,\ + pixel *srcu, intptr_t i_srcu,\ + pixel *srcv, intptr_t i_srcv, int w, int h )\ {\ int c_w = 16 / sizeof(pixel) - 1;\ if( !(w&c_w) )\ @@ -239,9 +244,7 @@ weight_fn_t *weightfn; } ALIGNED_16( x264_weight_t ); -extern const x264_weight_t x264_weight_none[3]; -extern const uint8_t x264_hpel_ref0[16]; -extern const uint8_t x264_hpel_ref1[16]; +#define x264_weight_none ((const x264_weight_t*)x264_zero) #define SET_WEIGHT( w, b, s, d, o )\ {\ @@ -305,6 +308,7 @@ /* prefetch the next few macroblocks of fenc or fdec */ void (*prefetch_fenc) ( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); + void (*prefetch_fenc_400)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); void (*prefetch_fenc_420)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); void (*prefetch_fenc_422)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x ); /* prefetch the next few macroblocks of a hpel reference frame */ @@ -335,6 +339,7 @@ void (*mbtree_fix8_unpack)( float *dst, uint16_t *src, int count ); } x264_mc_functions_t; +#define x264_mc_init x264_template(mc_init) void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/mips/dct-c.c x264-0.158.2988+git-20191101.7817004/common/mips/dct-c.c --- x264-0.152.2854+gite9a5903/common/mips/dct-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/dct-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct-c.c: msa transform and zigzag ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * @@ -25,6 +25,7 @@ #include "common/common.h" #include "macros.h" +#include "dct.h" #if !HIGH_BIT_DEPTH #define AVC_ITRANS_H( in0, in1, in2, in3, out0, out1, out2, out3 ) \ diff -Nru x264-0.152.2854+gite9a5903/common/mips/dct.h x264-0.158.2988+git-20191101.7817004/common/mips/dct.h --- x264-0.152.2854+gite9a5903/common/mips/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: msa transform and zigzag ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * @@ -26,24 +26,39 @@ #ifndef X264_MIPS_DCT_H #define X264_MIPS_DCT_H +#define x264_dct4x4dc_msa x264_template(dct4x4dc_msa) void x264_dct4x4dc_msa( int16_t d[16] ); +#define x264_idct4x4dc_msa x264_template(idct4x4dc_msa) void x264_idct4x4dc_msa( int16_t d[16] ); +#define x264_add4x4_idct_msa x264_template(add4x4_idct_msa) void x264_add4x4_idct_msa( uint8_t *p_dst, int16_t pi_dct[16] ); +#define x264_add8x8_idct_msa x264_template(add8x8_idct_msa) void x264_add8x8_idct_msa( uint8_t *p_dst, int16_t pi_dct[4][16] ); +#define x264_add16x16_idct_msa x264_template(add16x16_idct_msa) void x264_add16x16_idct_msa( uint8_t *p_dst, int16_t pi_dct[16][16] ); +#define x264_add8x8_idct8_msa x264_template(add8x8_idct8_msa) void x264_add8x8_idct8_msa( uint8_t *p_dst, int16_t pi_dct[64] ); +#define x264_add16x16_idct8_msa x264_template(add16x16_idct8_msa) void x264_add16x16_idct8_msa( uint8_t *p_dst, int16_t pi_dct[4][64] ); +#define x264_add8x8_idct_dc_msa x264_template(add8x8_idct_dc_msa) void x264_add8x8_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[4] ); +#define x264_add16x16_idct_dc_msa x264_template(add16x16_idct_dc_msa) void x264_add16x16_idct_dc_msa( uint8_t *p_dst, int16_t pi_dct[16] ); +#define x264_sub4x4_dct_msa x264_template(sub4x4_dct_msa) void x264_sub4x4_dct_msa( int16_t p_dst[16], uint8_t *p_src, uint8_t *p_ref ); +#define x264_sub8x8_dct_msa x264_template(sub8x8_dct_msa) void x264_sub8x8_dct_msa( int16_t p_dst[4][16], uint8_t *p_src, uint8_t *p_ref ); +#define x264_sub16x16_dct_msa x264_template(sub16x16_dct_msa) void x264_sub16x16_dct_msa( int16_t p_dst[16][16], uint8_t *p_src, uint8_t *p_ref ); +#define x264_sub8x8_dct_dc_msa x264_template(sub8x8_dct_dc_msa) void x264_sub8x8_dct_dc_msa( int16_t pi_dct[4], uint8_t *p_pix1, uint8_t *p_pix2 ); +#define x264_sub8x16_dct_dc_msa x264_template(sub8x16_dct_dc_msa) void x264_sub8x16_dct_dc_msa( int16_t pi_dct[8], uint8_t *p_pix1, uint8_t *p_pix2 ); +#define x264_zigzag_scan_4x4_frame_msa x264_template(zigzag_scan_4x4_frame_msa) void x264_zigzag_scan_4x4_frame_msa( int16_t pi_level[16], int16_t pi_dct[16] ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/mips/deblock-c.c x264-0.158.2988+git-20191101.7817004/common/mips/deblock-c.c --- x264-0.152.2854+gite9a5903/common/mips/deblock-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/deblock-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock-c.c: msa deblocking ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Neha Rana * @@ -25,6 +25,7 @@ #include "common/common.h" #include "macros.h" +#include "deblock.h" #if !HIGH_BIT_DEPTH #define AVC_LPF_P0P1P2_OR_Q0Q1Q2( p3_or_q3_org_in, p0_or_q0_org_in, \ diff -Nru x264-0.152.2854+gite9a5903/common/mips/deblock.h x264-0.158.2988+git-20191101.7817004/common/mips/deblock.h --- x264-0.152.2854+gite9a5903/common/mips/deblock.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/deblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,52 @@ +/***************************************************************************** + * deblock.h: msa deblocking + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_MIPS_DEBLOCK_H +#define X264_MIPS_DEBLOCK_H + +#if !HIGH_BIT_DEPTH +#define x264_deblock_v_luma_msa x264_template(deblock_v_luma_msa) +void x264_deblock_v_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_msa x264_template(deblock_h_luma_msa) +void x264_deblock_h_luma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_msa x264_template(deblock_v_chroma_msa) +void x264_deblock_v_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_msa x264_template(deblock_h_chroma_msa) +void x264_deblock_h_chroma_msa( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_luma_intra_msa x264_template(deblock_v_luma_intra_msa) +void x264_deblock_v_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_luma_intra_msa x264_template(deblock_h_luma_intra_msa) +void x264_deblock_h_luma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_msa x264_template(deblock_v_chroma_intra_msa) +void x264_deblock_v_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_msa x264_template(deblock_h_chroma_intra_msa) +void x264_deblock_h_chroma_intra_msa( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_strength_msa x264_template(deblock_strength_msa) +void x264_deblock_strength_msa( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit, + int bframe ); +#endif + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/mips/macros.h x264-0.158.2988+git-20191101.7817004/common/mips/macros.h --- x264-0.152.2854+gite9a5903/common/mips/macros.h 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/macros.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macros.h: msa macros ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.152.2854+gite9a5903/common/mips/mc-c.c x264-0.158.2988+git-20191101.7817004/common/mips/mc-c.c --- x264-0.152.2854+gite9a5903/common/mips/mc-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/mc-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: msa motion compensation ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Neha Rana * @@ -51,129 +51,6 @@ 0, 1, 1, 2, 16, 17, 17, 18, 16, 17, 17, 18, 18, 19, 19, 20 }; -void x264_mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - int32_t i_height ); -void x264_mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - int32_t i_height ); -void x264_mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, - intptr_t i_src_stride, int32_t i_height ); -void x264_memzero_aligned_msa( void *p_dst, size_t n ); - -void x264_pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); -void x264_pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t i_pix1_stride, - uint8_t *p_pix2, intptr_t i_pix2_stride, - uint8_t *p_pix3, intptr_t i_pix3_stride, - int32_t i_weight ); - -void x264_mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ); -void x264_mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ); -void x264_mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ); -void x264_mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ); - -weight_fn_t x264_mc_weight_wtab_msa[6] = -{ - x264_mc_weight_w4_msa, - x264_mc_weight_w4_msa, - x264_mc_weight_w8_msa, - x264_mc_weight_w16_msa, - x264_mc_weight_w16_msa, - x264_mc_weight_w20_msa, -}; - -void x264_mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src[4], intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height, - const x264_weight_t *pWeight ); -uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride, - uint8_t *p_src[4], intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height, - const x264_weight_t *pWeight ); -void x264_mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v, - intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height ); -void x264_hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v, - uint8_t *p_dstc, uint8_t *p_src, - intptr_t i_stride, int32_t i_width, - int32_t i_height, int16_t *p_buf ); - -void x264_plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src0, intptr_t i_src_stride0, - uint8_t *p_src1, intptr_t i_src_stride1, - int32_t i_width, int32_t i_height ); -void x264_plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0, - uint8_t *p_dst1, intptr_t i_dst_stride1, - uint8_t *p_src, intptr_t i_src_stride, - int32_t i_width, int32_t i_height ); -void x264_plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0, - intptr_t i_dst_stride0, - uint8_t *p_dst1, - intptr_t i_dst_stride1, - uint8_t *p_dst2, - intptr_t i_dst_stride2, - uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_src_width, int32_t i_width, - int32_t i_height ); -void x264_store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src0, uint8_t *p_src1, - int32_t i_height ); -void x264_load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_height ); -void x264_load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_height ); -void x264_frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0, - uint8_t *p_dst1, uint8_t *p_dst2, - uint8_t *p_dst3, intptr_t i_src_stride, - intptr_t i_dst_stride, int32_t i_width, - int32_t i_height ); - static void avc_luma_hz_16w_msa( uint8_t *p_src, int32_t i_src_stride, uint8_t *p_dst, int32_t i_dst_stride, int32_t i_height ) @@ -1861,10 +1738,10 @@ } } -static void plane_copy_interleave_msa( uint8_t *p_src0, int32_t i_src0_stride, - uint8_t *p_src1, int32_t i_src1_stride, - uint8_t *p_dst, int32_t i_dst_stride, - int32_t i_width, int32_t i_height ) +static void core_plane_copy_interleave_msa( uint8_t *p_src0, int32_t i_src0_stride, + uint8_t *p_src1, int32_t i_src1_stride, + uint8_t *p_dst, int32_t i_dst_stride, + int32_t i_width, int32_t i_height ) { int32_t i_loop_width, i_loop_height, i_w_mul8, i_h4w; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; @@ -1966,10 +1843,10 @@ } } -static void plane_copy_deinterleave_msa( uint8_t *p_src, int32_t i_src_stride, - uint8_t *p_dst0, int32_t dst0_stride, - uint8_t *p_dst1, int32_t dst1_stride, - int32_t i_width, int32_t i_height ) +static void core_plane_copy_deinterleave_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst0, int32_t dst0_stride, + uint8_t *p_dst1, int32_t dst1_stride, + int32_t i_width, int32_t i_height ) { int32_t i_loop_width, i_loop_height, i_w_mul4, i_w_mul8, i_h4w; uint32_t u_res_w0, u_res_w1; @@ -2098,16 +1975,16 @@ } -static void plane_copy_deinterleave_rgb_msa( uint8_t *p_src, - int32_t i_src_stride, - uint8_t *p_dst0, - int32_t i_dst0_stride, - uint8_t *p_dst1, - int32_t i_dst1_stride, - uint8_t *p_dst2, - int32_t i_dst2_stride, - int32_t i_width, - int32_t i_height ) +static void core_plane_copy_deinterleave_rgb_msa( uint8_t *p_src, + int32_t i_src_stride, + uint8_t *p_dst0, + int32_t i_dst0_stride, + uint8_t *p_dst1, + int32_t i_dst1_stride, + uint8_t *p_dst2, + int32_t i_dst2_stride, + int32_t i_width, + int32_t i_height ) { uint8_t *p_src_orig = p_src; uint8_t *p_dst0_orig = p_dst0; @@ -2234,16 +2111,16 @@ } } -static void plane_copy_deinterleave_rgba_msa( uint8_t *p_src, - int32_t i_src_stride, - uint8_t *p_dst0, - int32_t i_dst0_stride, - uint8_t *p_dst1, - int32_t i_dst1_stride, - uint8_t *p_dst2, - int32_t i_dst2_stride, - int32_t i_width, - int32_t i_height ) +static void core_plane_copy_deinterleave_rgba_msa( uint8_t *p_src, + int32_t i_src_stride, + uint8_t *p_dst0, + int32_t i_dst0_stride, + uint8_t *p_dst1, + int32_t i_dst1_stride, + uint8_t *p_dst2, + int32_t i_dst2_stride, + int32_t i_width, + int32_t i_height ) { uint8_t *p_src_orig = p_src; uint8_t *p_dst0_orig = p_dst0; @@ -2441,10 +2318,10 @@ } } -static void store_interleave_chroma_msa( uint8_t *p_src0, int32_t i_src0_stride, - uint8_t *p_src1, int32_t i_src1_stride, - uint8_t *p_dst, int32_t i_dst_stride, - int32_t i_height ) +static void core_store_interleave_chroma_msa( uint8_t *p_src0, int32_t i_src0_stride, + uint8_t *p_src1, int32_t i_src1_stride, + uint8_t *p_dst, int32_t i_dst_stride, + int32_t i_height ) { int32_t i_loop_height, i_h4w; v16u8 in0, in1, in2, in3, in4, in5, in6, in7; @@ -2476,12 +2353,12 @@ } } -static void frame_init_lowres_core_msa( uint8_t *p_src, int32_t i_src_stride, - uint8_t *p_dst0, int32_t dst0_stride, - uint8_t *p_dst1, int32_t dst1_stride, - uint8_t *p_dst2, int32_t dst2_stride, - uint8_t *p_dst3, int32_t dst3_stride, - int32_t i_width, int32_t i_height ) +static void core_frame_init_lowres_core_msa( uint8_t *p_src, int32_t i_src_stride, + uint8_t *p_dst0, int32_t dst0_stride, + uint8_t *p_dst1, int32_t dst1_stride, + uint8_t *p_dst2, int32_t dst2_stride, + uint8_t *p_dst3, int32_t dst3_stride, + int32_t i_width, int32_t i_height ) { int32_t i_loop_width, i_loop_height, i_w16_mul; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; @@ -2583,29 +2460,29 @@ } } -void x264_mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - int32_t i_height ) +static void mc_copy_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + int32_t i_height ) { copy_width16_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height ); } -void x264_mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, - intptr_t i_src_stride, int32_t i_height ) +static void mc_copy_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, + intptr_t i_src_stride, int32_t i_height ) { copy_width8_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height ); } -void x264_mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, - intptr_t i_src_stride, int32_t i_height ) +static void mc_copy_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, uint8_t *p_src, + intptr_t i_src_stride, int32_t i_height ) { copy_width4_msa( p_src, i_src_stride, p_dst, i_dst_stride, i_height ); } -void x264_pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_16x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2630,10 +2507,10 @@ } } -void x264_pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_16x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2658,10 +2535,10 @@ } } -void x264_pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_8x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2684,10 +2561,10 @@ } } -void x264_pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_8x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2710,10 +2587,10 @@ } } -void x264_pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_8x4_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2736,10 +2613,10 @@ } } -void x264_pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_4x16_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2762,10 +2639,10 @@ } } -void x264_pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_4x8_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2788,10 +2665,10 @@ } } -void x264_pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_4x4_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2814,10 +2691,10 @@ } } -void x264_pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t pix1_stride, - uint8_t *p_pix2, intptr_t pix2_stride, - uint8_t *p_pix3, intptr_t pix3_stride, - int32_t i_weight ) +static void pixel_avg_4x2_msa( uint8_t *p_pix1, intptr_t pix1_stride, + uint8_t *p_pix2, intptr_t pix2_stride, + uint8_t *p_pix3, intptr_t pix3_stride, + int32_t i_weight ) { if( 32 == i_weight ) { @@ -2841,7 +2718,7 @@ } -void x264_memzero_aligned_msa( void *p_dst, size_t n ) +static void memzero_aligned_msa( void *p_dst, size_t n ) { uint32_t u_tot32_mul_lines = n >> 5; uint32_t u_remaining = n - ( u_tot32_mul_lines << 5 ); @@ -2854,9 +2731,9 @@ } } -void x264_mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ) +static void mc_weight_w4_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ) { int32_t i_log2_denom = pWeight->i_denom; int32_t i_offset = pWeight->i_offset; @@ -2866,9 +2743,9 @@ i_height, i_log2_denom, i_weight, i_offset ); } -void x264_mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ) +static void mc_weight_w8_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ) { int32_t i_log2_denom = pWeight->i_denom; int32_t i_offset = pWeight->i_offset; @@ -2878,9 +2755,9 @@ i_height, i_log2_denom, i_weight, i_offset ); } -void x264_mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ) +static void mc_weight_w16_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ) { int32_t i_log2_denom = pWeight->i_denom; int32_t i_offset = pWeight->i_offset; @@ -2890,21 +2767,21 @@ i_height, i_log2_denom, i_weight, i_offset ); } -void x264_mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - const x264_weight_t *pWeight, int32_t i_height ) -{ - x264_mc_weight_w16_msa( p_dst, i_dst_stride, p_src, i_src_stride, - pWeight, i_height ); - x264_mc_weight_w4_msa( p_dst + 16, i_dst_stride, p_src + 16, i_src_stride, - pWeight, i_height ); +static void mc_weight_w20_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + const x264_weight_t *pWeight, int32_t i_height ) +{ + mc_weight_w16_msa( p_dst, i_dst_stride, p_src, i_src_stride, + pWeight, i_height ); + mc_weight_w4_msa( p_dst + 16, i_dst_stride, p_src + 16, i_src_stride, + pWeight, i_height ); } -void x264_mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src[4], intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height, - const x264_weight_t *pWeight ) +static void mc_luma_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src[4], intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height, + const x264_weight_t *pWeight ) { int32_t i_qpel_idx; int32_t i_offset; @@ -2940,19 +2817,19 @@ { if( 16 == i_width ) { - x264_mc_weight_w16_msa( p_dst, i_dst_stride, - p_dst, i_dst_stride, - pWeight, i_height ); + mc_weight_w16_msa( p_dst, i_dst_stride, + p_dst, i_dst_stride, + pWeight, i_height ); } else if( 8 == i_width ) { - x264_mc_weight_w8_msa( p_dst, i_dst_stride, p_dst, i_dst_stride, - pWeight, i_height ); + mc_weight_w8_msa( p_dst, i_dst_stride, p_dst, i_dst_stride, + pWeight, i_height ); } else if( 4 == i_width ) { - x264_mc_weight_w4_msa( p_dst, i_dst_stride, p_dst, i_dst_stride, - pWeight, i_height ); + mc_weight_w4_msa( p_dst, i_dst_stride, p_dst, i_dst_stride, + pWeight, i_height ); } } } @@ -2960,18 +2837,18 @@ { if( 16 == i_width ) { - x264_mc_weight_w16_msa( p_dst, i_dst_stride, p_src1, i_src_stride, - pWeight, i_height ); + mc_weight_w16_msa( p_dst, i_dst_stride, p_src1, i_src_stride, + pWeight, i_height ); } else if( 8 == i_width ) { - x264_mc_weight_w8_msa( p_dst, i_dst_stride, p_src1, i_src_stride, - pWeight, i_height ); + mc_weight_w8_msa( p_dst, i_dst_stride, p_src1, i_src_stride, + pWeight, i_height ); } else if( 4 == i_width ) { - x264_mc_weight_w4_msa( p_dst, i_dst_stride, p_src1, i_src_stride, - pWeight, i_height ); + mc_weight_w4_msa( p_dst, i_dst_stride, p_src1, i_src_stride, + pWeight, i_height ); } } else @@ -2994,11 +2871,11 @@ } } -void x264_mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v, - intptr_t i_dst_stride, - uint8_t *p_src, intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height ) +static void mc_chroma_msa( uint8_t *p_dst_u, uint8_t *p_dst_v, + intptr_t i_dst_stride, + uint8_t *p_src, intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height ) { int32_t i_d8x = m_vx & 0x07; int32_t i_d8y = m_vy & 0x07; @@ -3035,10 +2912,10 @@ } } -void x264_hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v, - uint8_t *p_dstc, uint8_t *p_src, - intptr_t i_stride, int32_t i_width, - int32_t i_height, int16_t *p_buf ) +static void hpel_filter_msa( uint8_t *p_dsth, uint8_t *p_dst_v, + uint8_t *p_dstc, uint8_t *p_src, + intptr_t i_stride, int32_t i_width, + int32_t i_height, int16_t *p_buf ) { for( int32_t i = 0; i < ( i_width / 16 ); i++ ) { @@ -3055,96 +2932,96 @@ } } -void x264_plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src0, intptr_t i_src_stride0, - uint8_t *p_src1, intptr_t i_src_stride1, - int32_t i_width, int32_t i_height ) -{ - plane_copy_interleave_msa( p_src0, i_src_stride0, p_src1, i_src_stride1, - p_dst, i_dst_stride, i_width, i_height ); +static void plane_copy_interleave_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src0, intptr_t i_src_stride0, + uint8_t *p_src1, intptr_t i_src_stride1, + int32_t i_width, int32_t i_height ) +{ + core_plane_copy_interleave_msa( p_src0, i_src_stride0, p_src1, i_src_stride1, + p_dst, i_dst_stride, i_width, i_height ); } -void x264_plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0, - uint8_t *p_dst1, intptr_t i_dst_stride1, - uint8_t *p_src, intptr_t i_src_stride, - int32_t i_width, int32_t i_height ) +static void plane_copy_deinterleave_msa( uint8_t *p_dst0, intptr_t i_dst_stride0, + uint8_t *p_dst1, intptr_t i_dst_stride1, + uint8_t *p_src, intptr_t i_src_stride, + int32_t i_width, int32_t i_height ) { - plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst0, i_dst_stride0, - p_dst1, i_dst_stride1, i_width, i_height ); + core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst0, i_dst_stride0, + p_dst1, i_dst_stride1, i_width, i_height ); } -void x264_plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0, - intptr_t i_dst_stride0, - uint8_t *p_dst1, - intptr_t i_dst_stride1, - uint8_t *p_dst2, - intptr_t i_dst_stride2, - uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_src_width, - int32_t i_width, - int32_t i_height ) +static void plane_copy_deinterleave_rgb_msa( uint8_t *p_dst0, + intptr_t i_dst_stride0, + uint8_t *p_dst1, + intptr_t i_dst_stride1, + uint8_t *p_dst2, + intptr_t i_dst_stride2, + uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_src_width, + int32_t i_width, + int32_t i_height ) { if( 3 == i_src_width ) { - plane_copy_deinterleave_rgb_msa( p_src, i_src_stride, - p_dst0, i_dst_stride0, - p_dst1, i_dst_stride1, - p_dst2, i_dst_stride2, - i_width, i_height ); + core_plane_copy_deinterleave_rgb_msa( p_src, i_src_stride, + p_dst0, i_dst_stride0, + p_dst1, i_dst_stride1, + p_dst2, i_dst_stride2, + i_width, i_height ); } else if( 4 == i_src_width ) { - plane_copy_deinterleave_rgba_msa( p_src, i_src_stride, - p_dst0, i_dst_stride0, - p_dst1, i_dst_stride1, - p_dst2, i_dst_stride2, - i_width, i_height ); + core_plane_copy_deinterleave_rgba_msa( p_src, i_src_stride, + p_dst0, i_dst_stride0, + p_dst1, i_dst_stride1, + p_dst2, i_dst_stride2, + i_width, i_height ); } } -void x264_store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride, - uint8_t *p_src0, uint8_t *p_src1, - int32_t i_height ) +static void store_interleave_chroma_msa( uint8_t *p_dst, intptr_t i_dst_stride, + uint8_t *p_src0, uint8_t *p_src1, + int32_t i_height ) { - store_interleave_chroma_msa( p_src0, FDEC_STRIDE, p_src1, FDEC_STRIDE, - p_dst, i_dst_stride, i_height ); + core_store_interleave_chroma_msa( p_src0, FDEC_STRIDE, p_src1, FDEC_STRIDE, + p_dst, i_dst_stride, i_height ); } -void x264_load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_height ) +static void load_deinterleave_chroma_fenc_msa( uint8_t *p_dst, uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_height ) { - plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FENC_STRIDE, - ( p_dst + ( FENC_STRIDE / 2 ) ), FENC_STRIDE, - 8, i_height ); + core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FENC_STRIDE, + ( p_dst + ( FENC_STRIDE / 2 ) ), FENC_STRIDE, + 8, i_height ); } -void x264_load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src, - intptr_t i_src_stride, - int32_t i_height ) +static void load_deinterleave_chroma_fdec_msa( uint8_t *p_dst, uint8_t *p_src, + intptr_t i_src_stride, + int32_t i_height ) { - plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FDEC_STRIDE, - ( p_dst + ( FDEC_STRIDE / 2 ) ), FDEC_STRIDE, - 8, i_height ); + core_plane_copy_deinterleave_msa( p_src, i_src_stride, p_dst, FDEC_STRIDE, + ( p_dst + ( FDEC_STRIDE / 2 ) ), FDEC_STRIDE, + 8, i_height ); } -void x264_frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0, - uint8_t *p_dst1, uint8_t *p_dst2, - uint8_t *p_dst3, intptr_t i_src_stride, - intptr_t i_dst_stride, int32_t i_width, - int32_t i_height ) +static void frame_init_lowres_core_msa( uint8_t *p_src, uint8_t *p_dst0, + uint8_t *p_dst1, uint8_t *p_dst2, + uint8_t *p_dst3, intptr_t i_src_stride, + intptr_t i_dst_stride, int32_t i_width, + int32_t i_height ) { - frame_init_lowres_core_msa( p_src, i_src_stride, p_dst0, i_dst_stride, - p_dst1, i_dst_stride, p_dst2, i_dst_stride, - p_dst3, i_dst_stride, i_width, i_height ); + core_frame_init_lowres_core_msa( p_src, i_src_stride, p_dst0, i_dst_stride, + p_dst1, i_dst_stride, p_dst2, i_dst_stride, + p_dst3, i_dst_stride, i_width, i_height ); } -uint8_t *x264_get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride, - uint8_t *p_src[4], intptr_t i_src_stride, - int32_t m_vx, int32_t m_vy, - int32_t i_width, int32_t i_height, - const x264_weight_t *pWeight ) +static uint8_t *get_ref_msa( uint8_t *p_dst, intptr_t *p_dst_stride, + uint8_t *p_src[4], intptr_t i_src_stride, + int32_t m_vx, int32_t m_vy, + int32_t i_width, int32_t i_height, + const x264_weight_t *pWeight ) { int32_t i_qpel_idx, i_cnt, i_h4w; int32_t i_offset; @@ -3289,9 +3166,9 @@ if( 16 == i_width || 12 == i_width ) { - x264_mc_weight_w16_msa( p_dst, *p_dst_stride, - p_dst, *p_dst_stride, - pWeight, i_h4w ); + mc_weight_w16_msa( p_dst, *p_dst_stride, + p_dst, *p_dst_stride, + pWeight, i_h4w ); for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) { v16i8 zero = {0}; @@ -3349,9 +3226,9 @@ } else if( 20 == i_width ) { - x264_mc_weight_w20_msa( p_dst, *p_dst_stride, - p_dst, *p_dst_stride, - pWeight, i_h4w ); + mc_weight_w20_msa( p_dst, *p_dst_stride, + p_dst, *p_dst_stride, + pWeight, i_h4w ); for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) { uint32_t temp0; @@ -3427,9 +3304,9 @@ } else if( 8 == i_width ) { - x264_mc_weight_w8_msa( p_dst, *p_dst_stride, - p_dst, *p_dst_stride, - pWeight, i_h4w ); + mc_weight_w8_msa( p_dst, *p_dst_stride, + p_dst, *p_dst_stride, + pWeight, i_h4w ); for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) { uint64_t temp0; @@ -3473,9 +3350,9 @@ } else if( 4 == i_width ) { - x264_mc_weight_w4_msa( p_dst, *p_dst_stride, - p_dst, *p_dst_stride, - pWeight, i_h4w ); + mc_weight_w4_msa( p_dst, *p_dst_stride, + p_dst, *p_dst_stride, + pWeight, i_h4w ); for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) { uint32_t temp0; @@ -3537,8 +3414,8 @@ if( 16 == i_width || 12 == i_width ) { - x264_mc_weight_w16_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, - pWeight, i_h4w ); + mc_weight_w16_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, + pWeight, i_h4w ); p_src1 = src1_org + i_h4w * i_src_stride; for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) @@ -3591,8 +3468,8 @@ } else if( 20 == i_width ) { - x264_mc_weight_w20_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, - pWeight, i_h4w ); + mc_weight_w20_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, + pWeight, i_h4w ); p_src1 = src1_org + i_h4w * i_src_stride; for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) @@ -3662,8 +3539,8 @@ } else if( 8 == i_width ) { - x264_mc_weight_w8_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, - pWeight, i_h4w ); + mc_weight_w8_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, + pWeight, i_h4w ); p_src1 = src1_org + i_h4w * i_src_stride; for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) @@ -3707,8 +3584,8 @@ } else if( 4 == i_width ) { - x264_mc_weight_w4_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, - pWeight, i_h4w ); + mc_weight_w4_msa( p_dst, *p_dst_stride, p_src1, i_src_stride, + pWeight, i_h4w ); p_src1 = src1_org + i_h4w * i_src_stride; for( i_cnt = i_h4w; i_cnt < i_height; i_cnt++ ) @@ -3761,6 +3638,16 @@ return p_src1; } } + +static weight_fn_t mc_weight_wtab_msa[6] = +{ + mc_weight_w4_msa, + mc_weight_w4_msa, + mc_weight_w8_msa, + mc_weight_w16_msa, + mc_weight_w16_msa, + mc_weight_w20_msa, +}; #endif // !HIGH_BIT_DEPTH void x264_mc_init_mips( int32_t cpu, x264_mc_functions_t *pf ) @@ -3768,42 +3655,42 @@ #if !HIGH_BIT_DEPTH if( cpu & X264_CPU_MSA ) { - pf->mc_luma = x264_mc_luma_msa; - pf->mc_chroma = x264_mc_chroma_msa; - pf->get_ref = x264_get_ref_msa; - - pf->avg[PIXEL_16x16]= x264_pixel_avg_16x16_msa; - pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_msa; - pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_msa; - pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_msa; - pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_msa; - pf->avg[PIXEL_4x16] = x264_pixel_avg_4x16_msa; - pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_msa; - pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_msa; - pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_msa; - - pf->weight = x264_mc_weight_wtab_msa; - pf->offsetadd = x264_mc_weight_wtab_msa; - pf->offsetsub = x264_mc_weight_wtab_msa; - - pf->copy_16x16_unaligned = x264_mc_copy_w16_msa; - pf->copy[PIXEL_16x16] = x264_mc_copy_w16_msa; - pf->copy[PIXEL_8x8] = x264_mc_copy_w8_msa; - pf->copy[PIXEL_4x4] = x264_mc_copy_w4_msa; - - pf->store_interleave_chroma = x264_store_interleave_chroma_msa; - pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_msa; - pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_msa; - - pf->plane_copy_interleave = x264_plane_copy_interleave_msa; - pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_msa; - pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_msa; + pf->mc_luma = mc_luma_msa; + pf->mc_chroma = mc_chroma_msa; + pf->get_ref = get_ref_msa; + + pf->avg[PIXEL_16x16]= pixel_avg_16x16_msa; + pf->avg[PIXEL_16x8] = pixel_avg_16x8_msa; + pf->avg[PIXEL_8x16] = pixel_avg_8x16_msa; + pf->avg[PIXEL_8x8] = pixel_avg_8x8_msa; + pf->avg[PIXEL_8x4] = pixel_avg_8x4_msa; + pf->avg[PIXEL_4x16] = pixel_avg_4x16_msa; + pf->avg[PIXEL_4x8] = pixel_avg_4x8_msa; + pf->avg[PIXEL_4x4] = pixel_avg_4x4_msa; + pf->avg[PIXEL_4x2] = pixel_avg_4x2_msa; + + pf->weight = mc_weight_wtab_msa; + pf->offsetadd = mc_weight_wtab_msa; + pf->offsetsub = mc_weight_wtab_msa; + + pf->copy_16x16_unaligned = mc_copy_w16_msa; + pf->copy[PIXEL_16x16] = mc_copy_w16_msa; + pf->copy[PIXEL_8x8] = mc_copy_w8_msa; + pf->copy[PIXEL_4x4] = mc_copy_w4_msa; + + pf->store_interleave_chroma = store_interleave_chroma_msa; + pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc_msa; + pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec_msa; + + pf->plane_copy_interleave = plane_copy_interleave_msa; + pf->plane_copy_deinterleave = plane_copy_deinterleave_msa; + pf->plane_copy_deinterleave_rgb = plane_copy_deinterleave_rgb_msa; - pf->hpel_filter = x264_hpel_filter_msa; + pf->hpel_filter = hpel_filter_msa; pf->memcpy_aligned = memcpy; - pf->memzero_aligned = x264_memzero_aligned_msa; - pf->frame_init_lowres_core = x264_frame_init_lowres_core_msa; + pf->memzero_aligned = memzero_aligned_msa; + pf->frame_init_lowres_core = frame_init_lowres_core_msa; } #endif // !HIGH_BIT_DEPTH } diff -Nru x264-0.152.2854+gite9a5903/common/mips/mc.h x264-0.158.2988+git-20191101.7817004/common/mips/mc.h --- x264-0.152.2854+gite9a5903/common/mips/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: msa motion compensation ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Neha Rana * @@ -26,6 +26,7 @@ #ifndef X264_MIPS_MC_H #define X264_MIPS_MC_H +#define x264_mc_init_mips x264_template(mc_init_mips) void x264_mc_init_mips( int cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/mips/pixel-c.c x264-0.158.2988+git-20191101.7817004/common/mips/pixel-c.c --- x264-0.152.2854+gite9a5903/common/mips/pixel-c.c 2017-12-31 12:50:50.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/pixel-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel-c.c: msa pixel metrics ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Mandar Sahastrabuddhe * diff -Nru x264-0.152.2854+gite9a5903/common/mips/pixel.h x264-0.158.2988+git-20191101.7817004/common/mips/pixel.h --- x264-0.152.2854+gite9a5903/common/mips/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: msa pixel metrics ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Mandar Sahastrabuddhe * @@ -23,146 +23,204 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_MIPS_SAD_H -#define X264_MIPS_SAD_H +#ifndef X264_MIPS_PIXEL_H +#define X264_MIPS_PIXEL_H +#define x264_pixel_sad_16x16_msa x264_template(pixel_sad_16x16_msa) int32_t x264_pixel_sad_16x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_16x8_msa x264_template(pixel_sad_16x8_msa) int32_t x264_pixel_sad_16x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_8x16_msa x264_template(pixel_sad_8x16_msa) int32_t x264_pixel_sad_8x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_8x8_msa x264_template(pixel_sad_8x8_msa) int32_t x264_pixel_sad_8x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_8x4_msa x264_template(pixel_sad_8x4_msa) int32_t x264_pixel_sad_8x4_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_4x16_msa x264_template(pixel_sad_4x16_msa) int32_t x264_pixel_sad_4x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_4x8_msa x264_template(pixel_sad_4x8_msa) int32_t x264_pixel_sad_4x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_4x4_msa x264_template(pixel_sad_4x4_msa) int32_t x264_pixel_sad_4x4_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_sad_x4_16x16_msa x264_template(pixel_sad_x4_16x16_msa) void x264_pixel_sad_x4_16x16_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_16x8_msa x264_template(pixel_sad_x4_16x8_msa) void x264_pixel_sad_x4_16x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_8x16_msa x264_template(pixel_sad_x4_8x16_msa) void x264_pixel_sad_x4_8x16_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_8x8_msa x264_template(pixel_sad_x4_8x8_msa) void x264_pixel_sad_x4_8x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_8x4_msa x264_template(pixel_sad_x4_8x4_msa) void x264_pixel_sad_x4_8x4_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_4x8_msa x264_template(pixel_sad_x4_4x8_msa) void x264_pixel_sad_x4_4x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x4_4x4_msa x264_template(pixel_sad_x4_4x4_msa) void x264_pixel_sad_x4_4x4_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, uint8_t *p_ref3, intptr_t i_ref_stride, int32_t p_sad_array[4] ); +#define x264_pixel_sad_x3_16x16_msa x264_template(pixel_sad_x3_16x16_msa) void x264_pixel_sad_x3_16x16_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_16x8_msa x264_template(pixel_sad_x3_16x8_msa) void x264_pixel_sad_x3_16x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_8x16_msa x264_template(pixel_sad_x3_8x16_msa) void x264_pixel_sad_x3_8x16_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_8x8_msa x264_template(pixel_sad_x3_8x8_msa) void x264_pixel_sad_x3_8x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_8x4_msa x264_template(pixel_sad_x3_8x4_msa) void x264_pixel_sad_x3_8x4_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_4x8_msa x264_template(pixel_sad_x3_4x8_msa) void x264_pixel_sad_x3_4x8_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_sad_x3_4x4_msa x264_template(pixel_sad_x3_4x4_msa) void x264_pixel_sad_x3_4x4_msa( uint8_t *p_src, uint8_t *p_ref0, uint8_t *p_ref1, uint8_t *p_ref2, intptr_t i_ref_stride, int32_t p_sad_array[3] ); +#define x264_pixel_ssd_16x16_msa x264_template(pixel_ssd_16x16_msa) int32_t x264_pixel_ssd_16x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_16x8_msa x264_template(pixel_ssd_16x8_msa) int32_t x264_pixel_ssd_16x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_8x16_msa x264_template(pixel_ssd_8x16_msa) int32_t x264_pixel_ssd_8x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_8x8_msa x264_template(pixel_ssd_8x8_msa) int32_t x264_pixel_ssd_8x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_8x4_msa x264_template(pixel_ssd_8x4_msa) int32_t x264_pixel_ssd_8x4_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_4x16_msa x264_template(pixel_ssd_4x16_msa) int32_t x264_pixel_ssd_4x16_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_4x8_msa x264_template(pixel_ssd_4x8_msa) int32_t x264_pixel_ssd_4x8_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_pixel_ssd_4x4_msa x264_template(pixel_ssd_4x4_msa) int32_t x264_pixel_ssd_4x4_msa( uint8_t *p_src, intptr_t i_src_stride, uint8_t *p_ref, intptr_t i_ref_stride ); +#define x264_intra_sad_x3_4x4_msa x264_template(intra_sad_x3_4x4_msa) void x264_intra_sad_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_intra_sad_x3_16x16_msa x264_template(intra_sad_x3_16x16_msa) void x264_intra_sad_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_intra_sad_x3_8x8_msa x264_template(intra_sad_x3_8x8_msa) void x264_intra_sad_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36], int32_t p_sad_array[3] ); +#define x264_intra_sad_x3_8x8c_msa x264_template(intra_sad_x3_8x8c_msa) void x264_intra_sad_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_ssim_4x4x2_core_msa x264_template(ssim_4x4x2_core_msa) void x264_ssim_4x4x2_core_msa( const uint8_t *p_pix1, intptr_t i_stride1, const uint8_t *p_pix2, intptr_t i_stride2, int32_t i_sums[2][4] ); +#define x264_pixel_hadamard_ac_8x8_msa x264_template(pixel_hadamard_ac_8x8_msa) uint64_t x264_pixel_hadamard_ac_8x8_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_hadamard_ac_8x16_msa x264_template(pixel_hadamard_ac_8x16_msa) uint64_t x264_pixel_hadamard_ac_8x16_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_hadamard_ac_16x8_msa x264_template(pixel_hadamard_ac_16x8_msa) uint64_t x264_pixel_hadamard_ac_16x8_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_hadamard_ac_16x16_msa x264_template(pixel_hadamard_ac_16x16_msa) uint64_t x264_pixel_hadamard_ac_16x16_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_satd_4x4_msa x264_template(pixel_satd_4x4_msa) int32_t x264_pixel_satd_4x4_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_4x8_msa x264_template(pixel_satd_4x8_msa) int32_t x264_pixel_satd_4x8_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_4x16_msa x264_template(pixel_satd_4x16_msa) int32_t x264_pixel_satd_4x16_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_8x4_msa x264_template(pixel_satd_8x4_msa) int32_t x264_pixel_satd_8x4_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_8x8_msa x264_template(pixel_satd_8x8_msa) int32_t x264_pixel_satd_8x8_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_8x16_msa x264_template(pixel_satd_8x16_msa) int32_t x264_pixel_satd_8x16_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_16x8_msa x264_template(pixel_satd_16x8_msa) int32_t x264_pixel_satd_16x8_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_satd_16x16_msa x264_template(pixel_satd_16x16_msa) int32_t x264_pixel_satd_16x16_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_sa8d_8x8_msa x264_template(pixel_sa8d_8x8_msa) int32_t x264_pixel_sa8d_8x8_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_pixel_sa8d_16x16_msa x264_template(pixel_sa8d_16x16_msa) int32_t x264_pixel_sa8d_16x16_msa( uint8_t *p_pix1, intptr_t i_stride, uint8_t *p_pix2, intptr_t i_stride2 ); +#define x264_intra_satd_x3_4x4_msa x264_template(intra_satd_x3_4x4_msa) void x264_intra_satd_x3_4x4_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_intra_satd_x3_16x16_msa x264_template(intra_satd_x3_16x16_msa) void x264_intra_satd_x3_16x16_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_intra_sa8d_x3_8x8_msa x264_template(intra_sa8d_x3_8x8_msa) void x264_intra_sa8d_x3_8x8_msa( uint8_t *p_enc, uint8_t p_edge[36], int32_t p_sad_array[3] ); +#define x264_intra_satd_x3_8x8c_msa x264_template(intra_satd_x3_8x8c_msa) void x264_intra_satd_x3_8x8c_msa( uint8_t *p_enc, uint8_t *p_dec, int32_t p_sad_array[3] ); +#define x264_pixel_var_16x16_msa x264_template(pixel_var_16x16_msa) uint64_t x264_pixel_var_16x16_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_var_8x16_msa x264_template(pixel_var_8x16_msa) uint64_t x264_pixel_var_8x16_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_var_8x8_msa x264_template(pixel_var_8x8_msa) uint64_t x264_pixel_var_8x8_msa( uint8_t *p_pix, intptr_t i_stride ); +#define x264_pixel_var2_8x16_msa x264_template(pixel_var2_8x16_msa) int32_t x264_pixel_var2_8x16_msa( uint8_t *p_pix1, intptr_t i_stride1, uint8_t *p_pix2, intptr_t i_stride2, int32_t *p_ssd ); +#define x264_pixel_var2_8x8_msa x264_template(pixel_var2_8x8_msa) int32_t x264_pixel_var2_8x8_msa( uint8_t *p_pix1, intptr_t i_stride1, uint8_t *p_pix2, intptr_t i_stride2, int32_t *p_ssd ); diff -Nru x264-0.152.2854+gite9a5903/common/mips/predict-c.c x264-0.158.2988+git-20191101.7817004/common/mips/predict-c.c --- x264-0.152.2854+gite9a5903/common/mips/predict-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/predict-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: msa intra prediction ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Mandar Sahastrabuddhe * @@ -25,6 +25,7 @@ #include "common/common.h" #include "macros.h" +#include "predict.h" #if !HIGH_BIT_DEPTH static void intra_predict_vert_4x4_msa( uint8_t *p_src, uint8_t *p_dst, diff -Nru x264-0.152.2854+gite9a5903/common/mips/predict.h x264-0.158.2988+git-20191101.7817004/common/mips/predict.h --- x264-0.152.2854+gite9a5903/common/mips/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: msa intra prediction ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * @@ -26,23 +26,41 @@ #ifndef X264_MIPS_PREDICT_H #define X264_MIPS_PREDICT_H +#define x264_intra_predict_dc_16x16_msa x264_template(intra_predict_dc_16x16_msa) void x264_intra_predict_dc_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_dc_left_16x16_msa x264_template(intra_predict_dc_left_16x16_msa) void x264_intra_predict_dc_left_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_dc_top_16x16_msa x264_template(intra_predict_dc_top_16x16_msa) void x264_intra_predict_dc_top_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_dc_128_16x16_msa x264_template(intra_predict_dc_128_16x16_msa) void x264_intra_predict_dc_128_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_hor_16x16_msa x264_template(intra_predict_hor_16x16_msa) void x264_intra_predict_hor_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_vert_16x16_msa x264_template(intra_predict_vert_16x16_msa) void x264_intra_predict_vert_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_plane_16x16_msa x264_template(intra_predict_plane_16x16_msa) void x264_intra_predict_plane_16x16_msa( uint8_t *p_src ); +#define x264_intra_predict_dc_4blk_8x8_msa x264_template(intra_predict_dc_4blk_8x8_msa) void x264_intra_predict_dc_4blk_8x8_msa( uint8_t *p_src ); +#define x264_intra_predict_hor_8x8_msa x264_template(intra_predict_hor_8x8_msa) void x264_intra_predict_hor_8x8_msa( uint8_t *p_src ); +#define x264_intra_predict_vert_8x8_msa x264_template(intra_predict_vert_8x8_msa) void x264_intra_predict_vert_8x8_msa( uint8_t *p_src ); +#define x264_intra_predict_plane_8x8_msa x264_template(intra_predict_plane_8x8_msa) void x264_intra_predict_plane_8x8_msa( uint8_t *p_src ); +#define x264_intra_predict_ddl_8x8_msa x264_template(intra_predict_ddl_8x8_msa) void x264_intra_predict_ddl_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +#define x264_intra_predict_dc_8x8_msa x264_template(intra_predict_dc_8x8_msa) void x264_intra_predict_dc_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +#define x264_intra_predict_h_8x8_msa x264_template(intra_predict_h_8x8_msa) void x264_intra_predict_h_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +#define x264_intra_predict_v_8x8_msa x264_template(intra_predict_v_8x8_msa) void x264_intra_predict_v_8x8_msa( uint8_t *p_src, uint8_t pu_xyz[36] ); +#define x264_intra_predict_dc_4x4_msa x264_template(intra_predict_dc_4x4_msa) void x264_intra_predict_dc_4x4_msa( uint8_t *p_src ); +#define x264_intra_predict_hor_4x4_msa x264_template(intra_predict_hor_4x4_msa) void x264_intra_predict_hor_4x4_msa( uint8_t *p_src ); +#define x264_intra_predict_vert_4x4_msa x264_template(intra_predict_vert_4x4_msa) void x264_intra_predict_vert_4x4_msa( uint8_t *p_src ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/mips/quant-c.c x264-0.158.2988+git-20191101.7817004/common/mips/quant-c.c --- x264-0.152.2854+gite9a5903/common/mips/quant-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/quant-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant-c.c: msa quantization and level-run ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * @@ -25,6 +25,7 @@ #include "common/common.h" #include "macros.h" +#include "quant.h" #if !HIGH_BIT_DEPTH static void avc_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], diff -Nru x264-0.152.2854+gite9a5903/common/mips/quant.h x264-0.158.2988+git-20191101.7817004/common/mips/quant.h --- x264-0.152.2854+gite9a5903/common/mips/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mips/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: msa quantization and level-run ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Rishikesh More * @@ -26,18 +26,27 @@ #ifndef X264_MIPS_QUANT_H #define X264_MIPS_QUANT_H +#define x264_dequant_4x4_msa x264_template(dequant_4x4_msa) void x264_dequant_4x4_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], int32_t i_qp ); +#define x264_dequant_8x8_msa x264_template(dequant_8x8_msa) void x264_dequant_8x8_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][64], int32_t i_qp ); +#define x264_dequant_4x4_dc_msa x264_template(dequant_4x4_dc_msa) void x264_dequant_4x4_dc_msa( int16_t *p_dct, int32_t pi_dequant_mf[6][16], int32_t i_qp ); +#define x264_quant_4x4_msa x264_template(quant_4x4_msa) int32_t x264_quant_4x4_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias ); +#define x264_quant_4x4x4_msa x264_template(quant_4x4x4_msa) int32_t x264_quant_4x4x4_msa( int16_t p_dct[4][16], uint16_t pu_mf[16], uint16_t pu_bias[16] ); +#define x264_quant_8x8_msa x264_template(quant_8x8_msa) int32_t x264_quant_8x8_msa( int16_t *p_dct, uint16_t *p_mf, uint16_t *p_bias ); +#define x264_quant_4x4_dc_msa x264_template(quant_4x4_dc_msa) int32_t x264_quant_4x4_dc_msa( int16_t *p_dct, int32_t i_mf, int32_t i_bias ); +#define x264_coeff_last64_msa x264_template(coeff_last64_msa) int32_t x264_coeff_last64_msa( int16_t *p_src ); +#define x264_coeff_last16_msa x264_template(coeff_last16_msa) int32_t x264_coeff_last16_msa( int16_t *p_src ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/mvpred.c x264-0.158.2988+git-20191101.7817004/common/mvpred.c --- x264-0.152.2854+gite9a5903/common/mvpred.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/mvpred.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mvpred.c: motion vector prediction ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -180,7 +180,7 @@ x264_mb_predict_mv_16x16( h, 0, 0, mv ); } -static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h ) +static int mb_predict_mv_direct16x16_temporal( x264_t *h ) { int mb_x = h->mb.i_mb_x; int mb_y = h->mb.i_mb_y; @@ -286,7 +286,7 @@ return 1; } -static ALWAYS_INLINE int x264_mb_predict_mv_direct16x16_spatial( x264_t *h, int b_interlaced ) +static ALWAYS_INLINE int mb_predict_mv_direct16x16_spatial( x264_t *h, int b_interlaced ) { int8_t ref[2]; ALIGNED_ARRAY_8( int16_t, mv,[2],[2] ); @@ -441,14 +441,14 @@ } -static int x264_mb_predict_mv_direct16x16_spatial_interlaced( x264_t *h ) +static int mb_predict_mv_direct16x16_spatial_interlaced( x264_t *h ) { - return x264_mb_predict_mv_direct16x16_spatial( h, 1 ); + return mb_predict_mv_direct16x16_spatial( h, 1 ); } -static int x264_mb_predict_mv_direct16x16_spatial_progressive( x264_t *h ) +static int mb_predict_mv_direct16x16_spatial_progressive( x264_t *h ) { - return x264_mb_predict_mv_direct16x16_spatial( h, 0 ); + return mb_predict_mv_direct16x16_spatial( h, 0 ); } int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed ) @@ -459,12 +459,12 @@ else if( h->sh.b_direct_spatial_mv_pred ) { if( SLICE_MBAFF ) - b_available = x264_mb_predict_mv_direct16x16_spatial_interlaced( h ); + b_available = mb_predict_mv_direct16x16_spatial_interlaced( h ); else - b_available = x264_mb_predict_mv_direct16x16_spatial_progressive( h ); + b_available = mb_predict_mv_direct16x16_spatial_progressive( h ); } else - b_available = x264_mb_predict_mv_direct16x16_temporal( h ); + b_available = mb_predict_mv_direct16x16_temporal( h ); if( b_changed != NULL && b_available ) { diff -Nru x264-0.152.2854+gite9a5903/common/opencl.c x264-0.158.2988+git-20191101.7817004/common/opencl.c --- x264-0.152.2854+gite9a5903/common/opencl.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/opencl.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * opencl.c: OpenCL initialization and kernel compilation ***************************************************************************** - * Copyright (C) 2012-2017 x264 project + * Copyright (C) 2012-2019 x264 project * * Authors: Steve Borho * Anton Mitrofanov @@ -115,11 +115,11 @@ /* Requires full include path in case of out-of-tree builds */ #include "common/oclobj.h" -static int x264_detect_switchable_graphics( void ); +static int detect_switchable_graphics( void ); /* Try to load the cached compiled program binary, verify the device context is * still valid before reuse */ -static cl_program x264_opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version ) +static cl_program opencl_cache_load( x264_t *h, const char *dev_name, const char *dev_vendor, const char *driver_version ) { /* try to load cached program binary */ FILE *fp = x264_fopen( h->param.psz_clbin_file, "rb" ); @@ -168,7 +168,7 @@ /* Save the compiled program binary to a file for later reuse. Device context * is also saved in the cache file so we do not reuse stale binaries */ -static void x264_opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version ) +static void opencl_cache_save( x264_t *h, cl_program program, const char *dev_name, const char *dev_vendor, const char *driver_version ) { FILE *fp = x264_fopen( h->param.psz_clbin_file, "wb" ); if( !fp ) @@ -216,7 +216,7 @@ * the Makefile. It defines a x264_opencl_source byte array which we will pass * to clCreateProgramWithSource(). We also attempt to use a cache file for the * compiled binary, stored in the current working folder. */ -static cl_program x264_opencl_compile( x264_t *h ) +static cl_program opencl_compile( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; cl_program program = NULL; @@ -239,7 +239,7 @@ if( vectorize ) { /* Disable OpenCL on Intel/AMD switchable graphics devices */ - if( x264_detect_switchable_graphics() ) + if( detect_switchable_graphics() ) { x264_log( h, X264_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" ); return NULL; @@ -257,7 +257,7 @@ x264_log( h, X264_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", dev_vendor, dev_name, h->opencl.b_device_AMD_SI ? "(SI)" : "" ); - program = x264_opencl_cache_load( h, dev_name, dev_vendor, driver_version ); + program = opencl_cache_load( h, dev_name, dev_vendor, driver_version ); if( !program ) { /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x264_opencl_source */ @@ -277,7 +277,7 @@ status = ocl->clBuildProgram( program, 1, &h->opencl.device, buildopts, NULL, NULL ); if( status == CL_SUCCESS ) { - x264_opencl_cache_save( h, program, dev_name, dev_vendor, driver_version ); + opencl_cache_save( h, program, dev_name, dev_vendor, driver_version ); return program; } @@ -322,7 +322,7 @@ return NULL; } -static int x264_opencl_lookahead_alloc( x264_t *h ) +static int opencl_lookahead_alloc( x264_t *h ) { if( !h->param.rc.i_lookahead ) return -1; @@ -360,11 +360,11 @@ x264_opencl_function_t *ocl = h->opencl.ocl; cl_int status; - h->opencl.lookahead_program = x264_opencl_compile( h ); + h->opencl.lookahead_program = opencl_compile( h ); if( !h->opencl.lookahead_program ) goto fail; - for( int i = 0; i < ARRAY_SIZE(kernelnames); i++ ) + for( int i = 0; i < ARRAY_ELEMS(kernelnames); i++ ) { *kernels[i] = ocl->clCreateKernel( h->opencl.lookahead_program, kernelnames[i], &status ); if( status != CL_SUCCESS ) @@ -394,7 +394,7 @@ return -1; } -static void CL_CALLBACK x264_opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data ) +static void CL_CALLBACK opencl_error_notify( const char *errinfo, const void *private_info, size_t cb, void *user_data ) { /* Any error notification can be assumed to be fatal to the OpenCL context. * We need to stop using it immediately to prevent further damage. */ @@ -470,7 +470,7 @@ if( context ) ocl->clReleaseContext( context ); - context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)x264_opencl_error_notify, (void*)h, &status ); + context = ocl->clCreateContext( NULL, 1, &h->opencl.device, (void*)opencl_error_notify, (void*)h, &status ); if( status != CL_SUCCESS || !context ) continue; @@ -540,7 +540,7 @@ if( ret ) x264_log( h, X264_LOG_WARNING, "OpenCL: Unable to find a compatible device\n" ); else - ret = x264_opencl_lookahead_alloc( h ); + ret = opencl_lookahead_alloc( h ); fail: if( context ) @@ -551,7 +551,7 @@ return ret; } -static void x264_opencl_lookahead_free( x264_t *h ) +static void opencl_lookahead_free( x264_t *h ) { x264_opencl_function_t *ocl = h->opencl.ocl; @@ -600,7 +600,7 @@ if( h->opencl.queue ) ocl->clFinish( h->opencl.queue ); - x264_opencl_lookahead_free( h ); + opencl_lookahead_free( h ); if( h->opencl.queue ) { @@ -663,7 +663,7 @@ return x264_malloc( iSize ); } -static int x264_detect_switchable_graphics( void ) +static int detect_switchable_graphics( void ) { void *hDLL; ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create; diff -Nru x264-0.152.2854+gite9a5903/common/opencl.h x264-0.158.2988+git-20191101.7817004/common/opencl.h --- x264-0.152.2854+gite9a5903/common/opencl.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/opencl.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * opencl.h: OpenCL structures and defines ***************************************************************************** - * Copyright (C) 2012-2017 x264 project + * Copyright (C) 2012-2019 x264 project * * Authors: Steve Borho * Anton Mitrofanov @@ -793,12 +793,17 @@ typedef struct x264_frame x264_frame; +#define x264_opencl_load_library x264_template(opencl_load_library) x264_opencl_function_t *x264_opencl_load_library( void ); +#define x264_opencl_close_library x264_template(opencl_close_library) void x264_opencl_close_library( x264_opencl_function_t *ocl ); +#define x264_opencl_lookahead_init x264_template(opencl_lookahead_init) int x264_opencl_lookahead_init( x264_t *h ); +#define x264_opencl_lookahead_delete x264_template(opencl_lookahead_delete) void x264_opencl_lookahead_delete( x264_t *h ); +#define x264_opencl_frame_delete x264_template(opencl_frame_delete) void x264_opencl_frame_delete( x264_frame *frame ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/osdep.c x264-0.158.2988+git-20191101.7817004/common/osdep.c --- x264-0.152.2854+gite9a5903/common/osdep.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/osdep.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * osdep.c: platform-specific code ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Steven Walters * Laurent Aimar @@ -25,12 +25,7 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#include "common.h" - -#ifdef _WIN32 -#include -#include -#endif +#include "osdep.h" #if SYS_WINDOWS #include @@ -51,6 +46,10 @@ struct timeb tb; ftime( &tb ); return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000; +#elif HAVE_CLOCK_GETTIME + struct timespec ts; + clock_gettime( CLOCK_MONOTONIC, &ts ); + return (int64_t)ts.tv_sec * 1000000 + (int64_t)ts.tv_nsec / 1000; #else struct timeval tv_date; gettimeofday( &tv_date, NULL ); @@ -60,9 +59,9 @@ #if HAVE_WIN32THREAD || PTW32_STATIC_LIB /* state of the threading library being initialized */ -static volatile LONG x264_threading_is_init = 0; +static volatile LONG threading_is_init = 0; -static void x264_threading_destroy( void ) +static void threading_destroy( void ) { #if PTW32_STATIC_LIB pthread_win32_thread_detach_np(); @@ -72,11 +71,8 @@ #endif } -int x264_threading_init( void ) +static int threading_init( void ) { - /* if already init, then do nothing */ - if( InterlockedCompareExchange( &x264_threading_is_init, 1, 0 ) ) - return 0; #if PTW32_STATIC_LIB /* if static pthread-win32 is already initialized, then do nothing */ if( ptw32_processInitialized ) @@ -88,119 +84,25 @@ return -1; #endif /* register cleanup to run at process termination */ - atexit( x264_threading_destroy ); - + atexit( threading_destroy ); return 0; } -#endif - -#ifdef _WIN32 -/* Functions for dealing with Unicode on Windows. */ -FILE *x264_fopen( const char *filename, const char *mode ) -{ - wchar_t filename_utf16[MAX_PATH]; - wchar_t mode_utf16[16]; - if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) ) - return _wfopen( filename_utf16, mode_utf16 ); - return NULL; -} -int x264_rename( const char *oldname, const char *newname ) +int x264_threading_init( void ) { - wchar_t oldname_utf16[MAX_PATH]; - wchar_t newname_utf16[MAX_PATH]; - if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) ) + LONG state; + while( (state = InterlockedCompareExchange( &threading_is_init, -1, 0 )) != 0 ) { - /* POSIX says that rename() removes the destination, but Win32 doesn't. */ - _wunlink( newname_utf16 ); - return _wrename( oldname_utf16, newname_utf16 ); + /* if already init, then do nothing */ + if( state > 0 ) + return 0; } - return -1; -} - -int x264_stat( const char *path, x264_struct_stat *buf ) -{ - wchar_t path_utf16[MAX_PATH]; - if( utf8_to_utf16( path, path_utf16 ) ) - return _wstati64( path_utf16, buf ); - return -1; -} - -#if !HAVE_WINRT -int x264_vfprintf( FILE *stream, const char *format, va_list arg ) -{ - HANDLE console = NULL; - DWORD mode; - - if( stream == stdout ) - console = GetStdHandle( STD_OUTPUT_HANDLE ); - else if( stream == stderr ) - console = GetStdHandle( STD_ERROR_HANDLE ); - - /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */ - if( GetConsoleMode( console, &mode ) ) + if( threading_init() < 0 ) { - char buf[4096]; - wchar_t buf_utf16[4096]; - va_list arg2; - - va_copy( arg2, arg ); - int length = vsnprintf( buf, sizeof(buf), format, arg2 ); - va_end( arg2 ); - - if( length > 0 && length < sizeof(buf) ) - { - /* WriteConsoleW is the most reliable way to output Unicode to a console. */ - int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) ); - DWORD written; - WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL ); - return length; - } + InterlockedExchange( &threading_is_init, 0 ); + return -1; } - return vfprintf( stream, format, arg ); -} - -int x264_is_pipe( const char *path ) -{ - wchar_t path_utf16[MAX_PATH]; - if( utf8_to_utf16( path, path_utf16 ) ) - return WaitNamedPipeW( path_utf16, 0 ); + InterlockedExchange( &threading_is_init, 1 ); return 0; } #endif - -#if defined(_MSC_VER) && _MSC_VER < 1900 -/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */ -int x264_snprintf( char *s, size_t n, const char *fmt, ... ) -{ - va_list arg; - va_start( arg, fmt ); - int length = x264_vsnprintf( s, n, fmt, arg ); - va_end( arg ); - return length; -} - -int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ) -{ - int length = -1; - - if( n ) - { - va_list arg2; - va_copy( arg2, arg ); - length = _vsnprintf( s, n, fmt, arg2 ); - va_end( arg2 ); - - /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */ - if( length < 0 || length >= n ) - s[n-1] = '\0'; - } - - /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */ - if( length < 0 ) - return _vscprintf( fmt, arg ); - - return length; -} -#endif -#endif diff -Nru x264-0.152.2854+gite9a5903/common/osdep.h x264-0.158.2988+git-20191101.7817004/common/osdep.h --- x264-0.152.2854+gite9a5903/common/osdep.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/osdep.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * osdep.h: platform-specific code ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -43,6 +43,13 @@ #include #endif +#ifdef _WIN32 +#include +#include +#endif + +#include "x264.h" + #if !HAVE_LOG2F #define log2f(x) (logf(x)/0.693147180559945f) #define log2(x) (log(x)/0.693147180559945) @@ -54,12 +61,6 @@ #define strncasecmp _strnicmp #define strtok_r strtok_s #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) -#if _MSC_VER < 1900 -int x264_snprintf( char *s, size_t n, const char *fmt, ... ); -int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ); -#define snprintf x264_snprintf -#define vsnprintf x264_vsnprintf -#endif #else #include #endif @@ -72,18 +73,85 @@ #define isfinite finite #endif -#ifdef _WIN32 -#ifndef strtok_r +#if !HAVE_STRTOK_R && !defined(strtok_r) #define strtok_r(str,delim,save) strtok(str,delim) #endif +#if defined(_MSC_VER) && _MSC_VER < 1900 +/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */ +static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ) +{ + int length = -1; + + if( n ) + { + va_list arg2; + va_copy( arg2, arg ); + length = _vsnprintf( s, n, fmt, arg2 ); + va_end( arg2 ); + + /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */ + if( length < 0 || length >= n ) + s[n-1] = '\0'; + } + + /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */ + if( length < 0 ) + return _vscprintf( fmt, arg ); + + return length; +} + +static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... ) +{ + va_list arg; + va_start( arg, fmt ); + int length = x264_vsnprintf( s, n, fmt, arg ); + va_end( arg ); + return length; +} + +#define snprintf x264_snprintf +#define vsnprintf x264_vsnprintf +#endif + +#ifdef _WIN32 #define utf8_to_utf16( utf8, utf16 )\ MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) ) -FILE *x264_fopen( const char *filename, const char *mode ); -int x264_rename( const char *oldname, const char *newname ); + +/* Functions for dealing with Unicode on Windows. */ +static inline FILE *x264_fopen( const char *filename, const char *mode ) +{ + wchar_t filename_utf16[MAX_PATH]; + wchar_t mode_utf16[16]; + if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) ) + return _wfopen( filename_utf16, mode_utf16 ); + return NULL; +} + +static inline int x264_rename( const char *oldname, const char *newname ) +{ + wchar_t oldname_utf16[MAX_PATH]; + wchar_t newname_utf16[MAX_PATH]; + if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) ) + { + /* POSIX says that rename() removes the destination, but Win32 doesn't. */ + _wunlink( newname_utf16 ); + return _wrename( oldname_utf16, newname_utf16 ); + } + return -1; +} + #define x264_struct_stat struct _stati64 #define x264_fstat _fstati64 -int x264_stat( const char *path, x264_struct_stat *buf ); + +static inline int x264_stat( const char *path, x264_struct_stat *buf ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return _wstati64( path_utf16, buf ); + return -1; +} #else #define x264_fopen fopen #define x264_rename rename @@ -93,16 +161,57 @@ #endif /* mdate: return the current date in microsecond */ -int64_t x264_mdate( void ); +X264_API int64_t x264_mdate( void ); #if defined(_WIN32) && !HAVE_WINRT -int x264_vfprintf( FILE *stream, const char *format, va_list arg ); -int x264_is_pipe( const char *path ); +static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg ) +{ + HANDLE console = NULL; + DWORD mode; + + if( stream == stdout ) + console = GetStdHandle( STD_OUTPUT_HANDLE ); + else if( stream == stderr ) + console = GetStdHandle( STD_ERROR_HANDLE ); + + /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */ + if( GetConsoleMode( console, &mode ) ) + { + char buf[4096]; + wchar_t buf_utf16[4096]; + va_list arg2; + + va_copy( arg2, arg ); + int length = vsnprintf( buf, sizeof(buf), format, arg2 ); + va_end( arg2 ); + + if( length > 0 && length < sizeof(buf) ) + { + /* WriteConsoleW is the most reliable way to output Unicode to a console. */ + int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) ); + DWORD written; + WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL ); + return length; + } + } + return vfprintf( stream, format, arg ); +} + +static inline int x264_is_pipe( const char *path ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return WaitNamedPipeW( path_utf16, 0 ); + return 0; +} #else #define x264_vfprintf vfprintf #define x264_is_pipe(x) 0 #endif +#define x264_glue3_expand(x,y,z) x##_##y##_##z +#define x264_glue3(x,y,z) x264_glue3_expand(x,y,z) + #ifdef _MSC_VER #define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var #else @@ -160,6 +269,12 @@ #define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16 #endif +#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4) +#define REALIGN_STACK __attribute__((force_align_arg_pointer)) +#else +#define REALIGN_STACK +#endif + #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) #define UNUSED __attribute__((unused)) #define ALWAYS_INLINE __attribute__((always_inline)) inline @@ -244,7 +359,7 @@ #endif #if HAVE_WIN32THREAD || PTW32_STATIC_LIB -int x264_threading_init( void ); +X264_API int x264_threading_init( void ); #else #define x264_threading_init() 0 #endif @@ -319,7 +434,7 @@ #endif /* For values with 4 bits or less. */ -static int ALWAYS_INLINE x264_ctz_4bit( uint32_t x ) +static ALWAYS_INLINE int x264_ctz_4bit( uint32_t x ) { static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; return lut[x]; @@ -329,7 +444,7 @@ #define x264_clz(x) __builtin_clz(x) #define x264_ctz(x) __builtin_ctz(x) #else -static int ALWAYS_INLINE x264_clz( uint32_t x ) +static ALWAYS_INLINE int x264_clz( uint32_t x ) { static uint8_t lut[16] = {4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0}; int y, z = (((x >> 16) - 1) >> 27) & 16; @@ -341,7 +456,7 @@ return z + lut[x]; } -static int ALWAYS_INLINE x264_ctz( uint32_t x ) +static ALWAYS_INLINE int x264_ctz( uint32_t x ) { static uint8_t lut[16] = {4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; int y, z = (((x & 0xffff) - 1) >> 27) & 16; diff -Nru x264-0.152.2854+gite9a5903/common/pixel.c x264-0.158.2988+git-20191101.7817004/common/pixel.c --- x264-0.152.2854+gite9a5903/common/pixel.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/pixel.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -31,18 +31,18 @@ # include "x86/pixel.h" # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/pixel.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/pixel.h" # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/pixel.h" # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/pixel.h" #endif @@ -194,9 +194,9 @@ return sum + ((uint64_t)sqr << 32); \ } -PIXEL_VAR_C( x264_pixel_var_16x16, 16, 16 ) -PIXEL_VAR_C( x264_pixel_var_8x16, 8, 16 ) -PIXEL_VAR_C( x264_pixel_var_8x8, 8, 8 ) +PIXEL_VAR_C( pixel_var_16x16, 16, 16 ) +PIXEL_VAR_C( pixel_var_8x16, 8, 16 ) +PIXEL_VAR_C( pixel_var_8x8, 8, 8 ) /**************************************************************************** * pixel_var2_wxh @@ -225,8 +225,8 @@ sqr_v - ((int64_t)sum_v * sum_v >> shift); \ } -PIXEL_VAR2_C( x264_pixel_var2_8x16, 16, 7 ) -PIXEL_VAR2_C( x264_pixel_var2_8x8, 8, 6 ) +PIXEL_VAR2_C( pixel_var2_8x16, 16, 7 ) +PIXEL_VAR2_C( pixel_var2_8x8, 8, 6 ) #if BIT_DEPTH > 8 typedef uint32_t sum_t; @@ -503,17 +503,18 @@ SATD_X_DECL7( _sse4 ) SATD_X_DECL7( _avx ) SATD_X_DECL7( _xop ) +SATD_X_DECL7( _avx512 ) #endif // !HIGH_BIT_DEPTH #endif #if !HIGH_BIT_DEPTH -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 SATD_X_DECL7( _neon ) #endif #endif // !HIGH_BIT_DEPTH #define INTRA_MBCMP_8x8( mbcmp, cpu, cpu2 )\ -void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\ +static void intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\ {\ ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\ x264_predict_8x8_v##cpu2( pix, edge );\ @@ -531,13 +532,13 @@ INTRA_MBCMP_8x8( sad, _mmx2, _c ) INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 ) #endif -#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64) +#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64) INTRA_MBCMP_8x8( sad, _neon, _neon ) INTRA_MBCMP_8x8(sa8d, _neon, _neon ) #endif #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\ -void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\ +static void intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\ {\ x264_predict_##size##chroma##_##pred1##cpu2( fdec );\ res[0] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ @@ -601,7 +602,7 @@ INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon ) #endif -#if !HIGH_BIT_DEPTH && ARCH_AARCH64 +#if !HIGH_BIT_DEPTH && HAVE_AARCH64 INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon ) @@ -613,7 +614,7 @@ #endif // No C implementation of intra_satd_x9. See checkasm for its behavior, -// or see x264_mb_analyse_intra for the entirely different algorithm we +// or see mb_analyse_intra for the entirely different algorithm we // use when lacking an asm implementation of it. @@ -851,11 +852,11 @@ pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8; - pixf->var[PIXEL_16x16] = x264_pixel_var_16x16; - pixf->var[PIXEL_8x16] = x264_pixel_var_8x16; - pixf->var[PIXEL_8x8] = x264_pixel_var_8x8; - pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16; - pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8; + pixf->var[PIXEL_16x16] = pixel_var_16x16; + pixf->var[PIXEL_8x16] = pixel_var_8x16; + pixf->var[PIXEL_8x8] = pixel_var_8x8; + pixf->var2[PIXEL_8x16] = pixel_var2_8x16; + pixf->var2[PIXEL_8x8] = pixel_var2_8x8; pixf->ssd_nv12_core = pixel_ssd_nv12_core; pixf->ssim_4x4x2_core = ssim_4x4x2_core; @@ -863,16 +864,16 @@ pixf->vsad = pixel_vsad; pixf->asd8 = pixel_asd8; - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4; - pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4; - pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8; - pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c; - pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16; - pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16; + pixf->intra_sad_x3_4x4 = intra_sad_x3_4x4; + pixf->intra_satd_x3_4x4 = intra_satd_x3_4x4; + pixf->intra_sad_x3_8x8 = intra_sad_x3_8x8; + pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c; + pixf->intra_satd_x3_8x8c = intra_satd_x3_8x8c; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16; + pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16; #if HIGH_BIT_DEPTH #if HAVE_MMX @@ -889,14 +890,14 @@ INIT8( ssd, _mmx2 ); INIT_ADS( _mmx2 ); - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_mmx2; + pixf->intra_sad_x3_4x4 = intra_sad_x3_4x4_mmx2; pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmx2; - pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmx2; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmx2; + pixf->intra_sad_x3_8x8 = intra_sad_x3_8x8_mmx2; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_mmx2; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmx2; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_mmx2; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmx2; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_mmx2; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_mmx2; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_mmx2; pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2; } if( cpu&X264_CPU_SSE2 ) @@ -906,21 +907,22 @@ INIT6( satd, _sse2 ); pixf->satd[PIXEL_4x16] = x264_pixel_satd_4x16_sse2; - pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; - pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; -#if ARCH_X86_64 - pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2; - pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2; -#endif - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse2; - pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_sse2; + pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_sse2; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2; pixf->ssim_end4 = x264_pixel_ssim_end4_sse2; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_sse2; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2; pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_sse2; pixf->var2[PIXEL_8x16] = x264_pixel_var2_8x16_sse2; - pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2; + + pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2; + pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2; +#if ARCH_X86_64 + pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse2; +#endif + pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse2; + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2; + pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_sse2; } if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_SSE2_IS_SLOW) ) { @@ -936,10 +938,10 @@ pixf->vsad = x264_pixel_vsad_sse2; pixf->asd8 = x264_pixel_asd8_sse2; pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_sse2; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_sse2; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_sse2; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_sse2; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse2; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_sse2; } if( cpu&X264_CPU_SSE2_IS_FAST ) { @@ -971,7 +973,6 @@ } pixf->vsad = x264_pixel_vsad_ssse3; pixf->asd8 = x264_pixel_asd8_ssse3; - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3; pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3; #if ARCH_X86_64 @@ -979,10 +980,10 @@ #endif pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3; pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_ssse3; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_ssse3; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_ssse3; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_ssse3; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_ssse3; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_ssse3; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_ssse3; } if( cpu&X264_CPU_SSE4 ) { @@ -997,7 +998,7 @@ #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4; #endif - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse4; } if( cpu&X264_CPU_AVX ) { @@ -1022,7 +1023,7 @@ #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_avx; #endif - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_avx; } if( cpu&X264_CPU_XOP ) { @@ -1108,8 +1109,8 @@ #endif pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmx2; pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmx2; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_mmx2; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_mmx2; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_mmx2; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_mmx2; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmx2; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmx2; pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmx2; @@ -1151,8 +1152,8 @@ pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_sse2; pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_sse2; pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse2; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_sse2; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse2; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_sse2; if( cpu&X264_CPU_CACHELINE_64 ) { INIT2( ssd, _sse2); /* faster for width 16 on p4 */ @@ -1231,7 +1232,7 @@ pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_ssse3; if( !(cpu&X264_CPU_SLOW_PSHUFB) ) pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_ssse3; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_ssse3; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_ssse3; pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_ssse3; pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_ssse3; pixf->var2[PIXEL_8x8] = x264_pixel_var2_8x8_ssse3; @@ -1271,7 +1272,7 @@ } pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_sse4; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_sse4; #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_sse4; #endif @@ -1301,7 +1302,7 @@ INIT5( ssd, _avx ); pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_avx; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_avx; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_avx; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_avx; pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_avx; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx; pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_avx; @@ -1326,7 +1327,7 @@ INIT5( ssd, _xop ); pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_xop; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_xop; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_xop; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_xop; pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_xop; #if ARCH_X86_64 pixf->sa8d_satd[PIXEL_16x16] = x264_pixel_sa8d_satd_16x16_xop; @@ -1363,6 +1364,8 @@ INIT7( sad_x3, _avx512 ); INIT7( sad_x4, _avx512 ); INIT8( satd, _avx512 ); + INIT7( satd_x3, _avx512 ); + INIT7( satd_x4, _avx512 ); pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_avx512; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_avx512; pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_avx512; @@ -1402,16 +1405,16 @@ pixf->vsad = x264_pixel_vsad_neon; pixf->asd8 = x264_pixel_asd8_neon; - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_neon; - pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_neon; - pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_neon; - pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_neon; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_neon; - pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_neon; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_neon; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_neon; - pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon; + pixf->intra_sad_x3_4x4 = intra_sad_x3_4x4_neon; + pixf->intra_satd_x3_4x4 = intra_satd_x3_4x4_neon; + pixf->intra_sad_x3_8x8 = intra_sad_x3_8x8_neon; + pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_neon; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_neon; + pixf->intra_satd_x3_8x8c = intra_satd_x3_8x8c_neon; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_neon; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_neon; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_neon; + pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16_neon; pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_neon; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; @@ -1431,7 +1434,7 @@ } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { INIT8( sad, _neon ); @@ -1457,22 +1460,22 @@ pixf->vsad = x264_pixel_vsad_neon; pixf->asd8 = x264_pixel_asd8_neon; - pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_neon; - pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_neon; - pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_neon; - pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_neon; - pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_neon; - pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_neon; - pixf->intra_sad_x3_8x16c = x264_intra_sad_x3_8x16c_neon; - pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_neon; - pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_neon; - pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_neon; + pixf->intra_sad_x3_4x4 = intra_sad_x3_4x4_neon; + pixf->intra_satd_x3_4x4 = intra_satd_x3_4x4_neon; + pixf->intra_sad_x3_8x8 = intra_sad_x3_8x8_neon; + pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_neon; + pixf->intra_sad_x3_8x8c = intra_sad_x3_8x8c_neon; + pixf->intra_satd_x3_8x8c = intra_satd_x3_8x8c_neon; + pixf->intra_sad_x3_8x16c = intra_sad_x3_8x16c_neon; + pixf->intra_satd_x3_8x16c = intra_satd_x3_8x16c_neon; + pixf->intra_sad_x3_16x16 = intra_sad_x3_16x16_neon; + pixf->intra_satd_x3_16x16 = intra_satd_x3_16x16_neon; pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_neon; pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; pixf->ssim_end4 = x264_pixel_ssim_end4_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_MSA if( cpu&X264_CPU_MSA ) diff -Nru x264-0.152.2854+gite9a5903/common/pixel.h x264-0.158.2988+git-20191101.7817004/common/pixel.h --- x264-0.152.2854+gite9a5903/common/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2004-2017 x264 project + * Copyright (C) 2004-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -143,13 +143,18 @@ int (*intra_sad_x9_8x8) ( pixel *fenc, pixel *fdec, pixel edge[36], uint16_t *bitcosts, uint16_t *satds ); } x264_pixel_function_t; +#define x264_pixel_init x264_template(pixel_init) void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); +#define x264_pixel_ssd_nv12 x264_template(pixel_ssd_nv12) void x264_pixel_ssd_nv12 ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v ); +#define x264_pixel_ssd_wxh x264_template(pixel_ssd_wxh) uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2, int i_width, int i_height ); +#define x264_pixel_ssim_wxh x264_template(pixel_ssim_wxh) float x264_pixel_ssim_wxh ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2, int i_width, int i_height, void *buf, int *cnt ); +#define x264_field_vsad x264_template(field_vsad) int x264_field_vsad( x264_t *h, int mb_x, int mb_y ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/dct.c x264-0.158.2988+git-20191101.7817004/common/ppc/dct.c --- x264-0.152.2854+gite9a5903/common/ppc/dct.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/dct.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Guillaume Poirier * Eric Petit @@ -26,6 +26,7 @@ #include "common/common.h" #include "ppccommon.h" +#include "dct.h" #if !HIGH_BIT_DEPTH #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \ @@ -136,11 +137,11 @@ void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ) { - vec_s16_t diff[2]; + vec_s16_t diff[2], tmp; vec_s32_t sum[2]; vec_s32_t zero32 = vec_splat_s32(0); - vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; + vec_u8_t mask = { 0x00, 0x01, 0x00, 0x01, 0x04, 0x05, 0x04, 0x05, + 0x02, 0x03, 0x02, 0x03, 0x06, 0x07, 0x06, 0x07 }; pix_diff( &pix1[0], &pix2[0], diff, 0 ); pix_diff( &pix1[4*FENC_STRIDE], &pix2[4*FDEC_STRIDE], diff, 1 ); @@ -151,20 +152,19 @@ sum[0] = vec_sum4s( diff[0], zero32 ); diff[0] = vec_packs( sum[0], zero32 ); - diff[1] = vec_vsx_ld( 0, dct ); - diff[0] = vec_perm( diff[0], diff[1], mask ); - - vec_vsx_st( diff[0], 0, dct ); - - /* 2x2 DC transform */ - int d0 = dct[0] + dct[1]; - int d1 = dct[2] + dct[3]; - int d2 = dct[0] - dct[1]; - int d3 = dct[2] - dct[3]; - dct[0] = d0 + d1; - dct[1] = d0 - d1; - dct[2] = d2 + d3; - dct[3] = d2 - d3; + diff[0] = vec_perm( diff[0], diff[0], mask ); // 0 0 2 2 1 1 3 3 + tmp = xxpermdi( diff[0], diff[0], 2 ); // 1 1 3 3 0 0 2 2 + diff[1] = vec_add( diff[0], tmp ); // 0+1 0+1 2+3 2+3 + diff[0] = vec_sub( diff[0], tmp ); // 0-1 0-1 2-3 2-3 + tmp = vec_mergeh( diff[1], diff[0] ); // 0+1 0-1 0+1 0-1 2+3 2-3 2+3 2-3 + diff[0] = xxpermdi( tmp, tmp, 2 ); // 2+3 2-3 2+3 2-3 + diff[1] = vec_add( tmp, diff[0] ); // 0+1+2+3 0-1+2+3 + diff[0] = vec_sub( tmp, diff[0] ); // 0+1-2-3 0-1-2+3 + diff[0] = vec_mergeh( diff[1], diff[0] ); + + diff[1] = vec_ld( 0, dct ); + diff[0] = xxpermdi( diff[0], diff[1], 0 ); + vec_st( diff[0], 0, dct ); } /* DCT8_1D unrolled by 8 in Altivec */ @@ -293,29 +293,125 @@ vec_vsx_st( dcvsum8, 0, dest ); \ } -static void idct8_dc_altivec( uint8_t *dst, vec_s16_t dcv ) +void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ) { + vec_s16_t dcv0, dcv1; + vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) ); + vec_u16_t v6 = vec_splat_u16( 6 ); + vec_s16_t dctv = vec_ld( 0, dct ); + vec_u8_t dstv0, dstv1, dstv2, dstv3, dstv4, dstv5, dstv6, dstv7; + vec_s16_t dcvsum0, dcvsum1, dcvsum2, dcvsum3, dcvsum4, dcvsum5, dcvsum6, dcvsum7; + vec_u8_t dcvsum8_0, dcvsum8_1, dcvsum8_2, dcvsum8_3, dcvsum8_4, dcvsum8_5, dcvsum8_6, dcvsum8_7; LOAD_ZERO; - ALTIVEC_STORE8_DC_SUM_CLIP( &dst[0*FDEC_STRIDE], dcv ); - ALTIVEC_STORE8_DC_SUM_CLIP( &dst[1*FDEC_STRIDE], dcv ); - ALTIVEC_STORE8_DC_SUM_CLIP( &dst[2*FDEC_STRIDE], dcv ); - ALTIVEC_STORE8_DC_SUM_CLIP( &dst[3*FDEC_STRIDE], dcv ); -} -void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ) + dctv = vec_sra( vec_add( dctv, v32 ), v6 ); + dcv1 = (vec_s16_t)vec_mergeh( dctv, dctv ); + dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + + dstv0 = vec_vsx_ld( 0, p_dst ); + dstv4 = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE ); + dstv1 = vec_vsx_ld( 0, p_dst + 1*FDEC_STRIDE ); + dstv5 = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 1*FDEC_STRIDE ); + dstv2 = vec_vsx_ld( 0, p_dst + 2*FDEC_STRIDE); + dstv6 = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 2*FDEC_STRIDE ); + dstv3 = vec_vsx_ld( 0, p_dst + 3*FDEC_STRIDE); + dstv7 = vec_vsx_ld( 0, p_dst + 4*FDEC_STRIDE + 3*FDEC_STRIDE ); + + vec_s16_t s0 = vec_u8_to_s16_h( dstv0 ); + vec_s16_t s1 = vec_u8_to_s16_h( dstv4 ); + vec_s16_t s2 = vec_u8_to_s16_h( dstv1 ); + vec_s16_t s3 = vec_u8_to_s16_h( dstv5 ); + vec_s16_t s4 = vec_u8_to_s16_h( dstv2 ); + vec_s16_t s5 = vec_u8_to_s16_h( dstv6 ); + vec_s16_t s6 = vec_u8_to_s16_h( dstv3 ); + vec_s16_t s7 = vec_u8_to_s16_h( dstv7 ); + dcvsum0 = vec_adds( dcv0, s0 ); + dcvsum4 = vec_adds( dcv1, s1 ); + dcvsum1 = vec_adds( dcv0, s2 ); + dcvsum5 = vec_adds( dcv1, s3 ); + dcvsum2 = vec_adds( dcv0, s4 ); + dcvsum6 = vec_adds( dcv1, s5 ); + dcvsum3 = vec_adds( dcv0, s6 ); + dcvsum7 = vec_adds( dcv1, s7 ); + dcvsum8_0 = vec_packsu( dcvsum0, vec_u8_to_s16_l( dstv0 ) ); + dcvsum8_1 = vec_packsu( dcvsum1, vec_u8_to_s16_l( dstv1 ) ); + dcvsum8_2 = vec_packsu( dcvsum2, vec_u8_to_s16_l( dstv2 ) ); + dcvsum8_3 = vec_packsu( dcvsum3, vec_u8_to_s16_l( dstv3 ) ); + dcvsum8_4 = vec_packsu( dcvsum4, vec_u8_to_s16_l( dstv4 ) ); + dcvsum8_5 = vec_packsu( dcvsum5, vec_u8_to_s16_l( dstv5 ) ); + dcvsum8_6 = vec_packsu( dcvsum6, vec_u8_to_s16_l( dstv6 ) ); + dcvsum8_7 = vec_packsu( dcvsum7, vec_u8_to_s16_l( dstv7 ) ); + + vec_vsx_st( dcvsum8_0, 0, p_dst ); + vec_vsx_st( dcvsum8_4, 0, p_dst + 4*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_1, 0, p_dst + 1*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_5, 0, p_dst + 4*FDEC_STRIDE + 1*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_2, 0, p_dst + 2*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_6, 0, p_dst + 4*FDEC_STRIDE + 2*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_3, 0, p_dst + 3*FDEC_STRIDE ); + vec_vsx_st( dcvsum8_7, 0, p_dst + 4*FDEC_STRIDE + 3*FDEC_STRIDE ); +} + +#define LOAD16 \ + dstv0 = vec_ld( 0, p_dst ); \ + dstv1 = vec_ld( 0, p_dst + 1*FDEC_STRIDE ); \ + dstv2 = vec_ld( 0, p_dst + 2*FDEC_STRIDE ); \ + dstv3 = vec_ld( 0, p_dst + 3*FDEC_STRIDE ); + +#define SUM16 \ + dcvsum0 = vec_adds( dcv0, vec_u8_to_s16_h( dstv0 ) ); \ + dcvsum4 = vec_adds( dcv1, vec_u8_to_s16_l( dstv0 ) ); \ + dcvsum1 = vec_adds( dcv0, vec_u8_to_s16_h( dstv1 ) ); \ + dcvsum5 = vec_adds( dcv1, vec_u8_to_s16_l( dstv1 ) ); \ + dcvsum2 = vec_adds( dcv0, vec_u8_to_s16_h( dstv2 ) ); \ + dcvsum6 = vec_adds( dcv1, vec_u8_to_s16_l( dstv2 ) ); \ + dcvsum3 = vec_adds( dcv0, vec_u8_to_s16_h( dstv3 ) ); \ + dcvsum7 = vec_adds( dcv1, vec_u8_to_s16_l( dstv3 ) ); \ + dcvsum8_0 = vec_packsu( dcvsum0, dcvsum4 ); \ + dcvsum8_1 = vec_packsu( dcvsum1, dcvsum5 ); \ + dcvsum8_2 = vec_packsu( dcvsum2, dcvsum6 ); \ + dcvsum8_3 = vec_packsu( dcvsum3, dcvsum7 ); + +#define STORE16 \ + vec_st( dcvsum8_0, 0, p_dst ); \ + vec_st( dcvsum8_1, 0, p_dst + 1*FDEC_STRIDE ); \ + vec_st( dcvsum8_2, 0, p_dst + 2*FDEC_STRIDE ); \ + vec_st( dcvsum8_3, 0, p_dst + 3*FDEC_STRIDE ); + +void x264_add16x16_idct_dc_altivec( uint8_t *p_dst, int16_t dct[16] ) { - vec_s16_t dcv; + vec_s16_t dcv0, dcv1; vec_s16_t v32 = vec_sl( vec_splat_s16( 8 ), vec_splat_u16( 2 ) ); vec_u16_t v6 = vec_splat_u16( 6 ); - vec_s16_t dctv = vec_vsx_ld( 0, dct ); + vec_u8_t dstv0, dstv1, dstv2, dstv3; + vec_s16_t dcvsum0, dcvsum1, dcvsum2, dcvsum3, dcvsum4, dcvsum5, dcvsum6, dcvsum7; + vec_u8_t dcvsum8_0, dcvsum8_1, dcvsum8_2, dcvsum8_3; + LOAD_ZERO; - dctv = vec_sra( vec_add( dctv, v32 ), v6 ); - dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 0 ), (vec_s32_t)vec_splat( dctv, 1 ) ); - dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv ); - idct8_dc_altivec( &p_dst[0], dcv ); - dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)vec_splat( dctv, 2 ), (vec_s32_t)vec_splat( dctv, 3 ) ); - dcv = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv, (vec_s32_t)dcv ); - idct8_dc_altivec( &p_dst[4*FDEC_STRIDE+0], dcv ); + for( int i = 0; i < 2; i++ ) + { + vec_s16_t dctv = vec_ld( 0, dct ); + + dctv = vec_sra( vec_add( dctv, v32 ), v6 ); + dcv1 = (vec_s16_t)vec_mergeh( dctv, dctv ); + dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + LOAD16; + SUM16; + STORE16; + + p_dst += 4*FDEC_STRIDE; + dcv1 = (vec_s16_t)vec_mergel( dctv, dctv ); + dcv0 = (vec_s16_t)vec_mergeh( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + dcv1 = (vec_s16_t)vec_mergel( (vec_s32_t)dcv1, (vec_s32_t)dcv1 ); + LOAD16; + SUM16; + STORE16; + + dct += 8; + p_dst += 4*FDEC_STRIDE; + } } #define IDCT_1D_ALTIVEC(s0, s1, s2, s3, d0, d1, d2, d3) \ @@ -640,7 +736,7 @@ { vec_s16_t tmpv[8]; vec_s16_t merge[2]; - vec_s16_t permv[2]; + vec_s16_t permv[3]; vec_s16_t orv[4]; vec_s16_t src0v = vec_ld( 0*16, src ); vec_s16_t src1v = vec_ld( 1*16, src ); diff -Nru x264-0.152.2854+gite9a5903/common/ppc/dct.h x264-0.158.2988+git-20191101.7817004/common/ppc/dct.h --- x264-0.152.2854+gite9a5903/common/ppc/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * Guillaume Poirier @@ -27,26 +27,44 @@ #ifndef X264_PPC_DCT_H #define X264_PPC_DCT_H +#define x264_sub4x4_dct_altivec x264_template(sub4x4_dct_altivec) void x264_sub4x4_dct_altivec( int16_t dct[16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_altivec x264_template(sub8x8_dct_altivec) void x264_sub8x8_dct_altivec( int16_t dct[4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_altivec x264_template(sub16x16_dct_altivec) void x264_sub16x16_dct_altivec( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add8x8_idct_dc_altivec x264_template(add8x8_idct_dc_altivec) void x264_add8x8_idct_dc_altivec( uint8_t *p_dst, int16_t dct[4] ); +#define x264_add16x16_idct_dc_altivec x264_template(add16x16_idct_dc_altivec) +void x264_add16x16_idct_dc_altivec( uint8_t *p_dst, int16_t dct[16] ); +#define x264_add4x4_idct_altivec x264_template(add4x4_idct_altivec) void x264_add4x4_idct_altivec( uint8_t *p_dst, int16_t dct[16] ); +#define x264_add8x8_idct_altivec x264_template(add8x8_idct_altivec) void x264_add8x8_idct_altivec( uint8_t *p_dst, int16_t dct[4][16] ); +#define x264_add16x16_idct_altivec x264_template(add16x16_idct_altivec) void x264_add16x16_idct_altivec( uint8_t *p_dst, int16_t dct[16][16] ); +#define x264_sub8x8_dct_dc_altivec x264_template(sub8x8_dct_dc_altivec) void x264_sub8x8_dct_dc_altivec( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct8_altivec x264_template(sub8x8_dct8_altivec) void x264_sub8x8_dct8_altivec( int16_t dct[64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct8_altivec x264_template(sub16x16_dct8_altivec) void x264_sub16x16_dct8_altivec( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add8x8_idct8_altivec x264_template(add8x8_idct8_altivec) void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[64] ); +#define x264_add16x16_idct8_altivec x264_template(add16x16_idct8_altivec) void x264_add16x16_idct8_altivec( uint8_t *dst, int16_t dct[4][64] ); +#define x264_zigzag_scan_4x4_frame_altivec x264_template(zigzag_scan_4x4_frame_altivec) void x264_zigzag_scan_4x4_frame_altivec( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_4x4_field_altivec x264_template(zigzag_scan_4x4_field_altivec) void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_8x8_frame_altivec x264_template(zigzag_scan_8x8_frame_altivec) void x264_zigzag_scan_8x8_frame_altivec( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_interleave_8x8_cavlc_altivec x264_template(zigzag_interleave_8x8_cavlc_altivec) void x264_zigzag_interleave_8x8_cavlc_altivec( int16_t *dst, int16_t *src, uint8_t *nnz ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/deblock.c x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.c --- x264-0.152.2854+gite9a5903/common/ppc/deblock.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: ppc deblocking ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Guillaume Poirier * @@ -25,6 +25,7 @@ #include "common/common.h" #include "ppccommon.h" +#include "deblock.h" #if !HIGH_BIT_DEPTH #define transpose4x16(r0, r1, r2, r3) \ @@ -140,11 +141,7 @@ // out: o = |x-y| < a static inline vec_u8_t diff_lt_altivec( register vec_u8_t x, register vec_u8_t y, register vec_u8_t a ) { - register vec_u8_t diff = vec_subs(x, y); - register vec_u8_t diffneg = vec_subs(y, x); - register vec_u8_t o = vec_or(diff, diffneg); /* |x-y| */ - o = (vec_u8_t)vec_cmplt(o, a); - return o; + return (vec_u8_t)vec_cmplt(vec_absd(x, y), a); } static inline vec_u8_t h264_deblock_mask( register vec_u8_t p0, register vec_u8_t p1, register vec_u8_t q0, diff -Nru x264-0.152.2854+gite9a5903/common/ppc/deblock.h x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.h --- x264-0.152.2854+gite9a5903/common/ppc/deblock.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/deblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,34 @@ +/***************************************************************************** + * deblock.h: ppc deblocking + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_PPC_DEBLOCK_H +#define X264_PPC_DEBLOCK_H + +#define x264_deblock_v_luma_altivec x264_template(deblock_v_luma_altivec) +void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_altivec x264_template(deblock_h_luma_altivec) +void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/mc.c x264-0.158.2988+git-20191101.7817004/common/ppc/mc.c --- x264-0.152.2854+gite9a5903/common/ppc/mc.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/mc.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * Guillaume Poirier @@ -25,33 +25,40 @@ *****************************************************************************/ #include "common/common.h" -#include "mc.h" #include "ppccommon.h" +#include "mc.h" #if !HIGH_BIT_DEPTH typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src, uint8_t *dst, intptr_t i_dst, int i_height ); -static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src1, intptr_t i_src1, - uint8_t *src2, int i_height ) +static inline void pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src1, intptr_t i_src1, + uint8_t *src2, int i_height ) { for( int y = 0; y < i_height; y++ ) { +#ifndef __POWER9_VECTOR__ for( int x = 0; x < 4; x++ ) dst[x] = ( src1[x] + src2[x] + 1 ) >> 1; +#else + vec_u8_t s1 = vec_vsx_ld( 0, src1 ); + vec_u8_t s2 = vec_vsx_ld( 0, src2 ); + vec_u8_t avg = vec_avg( s1, s2 ); + + vec_xst_len( avg, dst, 4 ); +#endif dst += i_dst; src1 += i_src1; src2 += i_src1; } } -static inline void x264_pixel_avg2_w8_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src1, intptr_t i_src1, - uint8_t *src2, int i_height ) +static inline void pixel_avg2_w8_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src1, intptr_t i_src1, + uint8_t *src2, int i_height ) { vec_u8_t src1v, src2v; - PREP_STORE8; for( int y = 0; y < i_height; y++ ) { @@ -67,9 +74,9 @@ } } -static inline void x264_pixel_avg2_w16_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src1, intptr_t i_src1, - uint8_t *src2, int i_height ) +static inline void pixel_avg2_w16_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src1, intptr_t i_src1, + uint8_t *src2, int i_height ) { vec_u8_t src1v, src2v; @@ -86,16 +93,22 @@ } } -static inline void x264_pixel_avg2_w20_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src1, intptr_t i_src1, - uint8_t *src2, int i_height ) +static inline void pixel_avg2_w20_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src1, intptr_t i_src1, + uint8_t *src2, int i_height ) { - x264_pixel_avg2_w16_altivec(dst, i_dst, src1, i_src1, src2, i_height); - x264_pixel_avg2_w4_altivec(dst+16, i_dst, src1+16, i_src1, src2+16, i_height); + pixel_avg2_w16_altivec(dst, i_dst, src1, i_src1, src2, i_height); + pixel_avg2_w4_altivec(dst+16, i_dst, src1+16, i_src1, src2+16, i_height); } /* mc_copy: plain c */ +#ifndef __POWER9_VECTOR__ +#define tiny_copy( d, s, l ) memcpy( d, s, l ) +#else +#define tiny_copy( d, s, l ) vec_xst_len( vec_vsx_ld( 0, s ), d, l ) +#endif + #define MC_COPY( name, a ) \ static void name( uint8_t *dst, intptr_t i_dst, \ uint8_t *src, intptr_t i_src, int i_height ) \ @@ -108,11 +121,11 @@ dst += i_dst; \ } \ } -MC_COPY( x264_mc_copy_w4_altivec, 4 ) -MC_COPY( x264_mc_copy_w8_altivec, 8 ) +MC_COPY( mc_copy_w4_altivec, 4 ) +MC_COPY( mc_copy_w8_altivec, 8 ) -static void x264_mc_copy_w16_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src, intptr_t i_src, int i_height ) +static void mc_copy_w16_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src, intptr_t i_src, int i_height ) { vec_u8_t cpyV; @@ -127,8 +140,8 @@ } -static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, intptr_t i_dst, - uint8_t *src, intptr_t i_src, int i_height ) +static void mc_copy_w16_aligned_altivec( uint8_t *dst, intptr_t i_dst, + uint8_t *src, intptr_t i_src, int i_height ) { for( int y = 0; y < i_height; ++y ) { @@ -140,6 +153,7 @@ } } +#define x264_plane_copy_swap_core_altivec x264_template(plane_copy_swap_core_altivec) void x264_plane_copy_swap_core_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src, int w, int h ) { @@ -155,6 +169,7 @@ } } +#define x264_plane_copy_interleave_core_altivec x264_template(plane_copy_interleave_core_altivec) void x264_plane_copy_interleave_core_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *srcu, intptr_t i_srcu, uint8_t *srcv, intptr_t i_srcv, int w, int h ) @@ -208,6 +223,20 @@ } } +static void load_deinterleave_chroma_fenc_altivec( uint8_t *dst, uint8_t *src, intptr_t i_src, int height ) +{ + const vec_u8_t mask = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F }; + + for( int y = 0; y < height; y += 2, dst += 2*FENC_STRIDE, src += 2*i_src ) + { + vec_u8_t src0 = vec_ld( 0, src ); + vec_u8_t src1 = vec_ld( i_src, src ); + + vec_st( vec_perm( src0, src0, mask ), 0*FENC_STRIDE, dst ); + vec_st( vec_perm( src1, src1, mask ), 1*FENC_STRIDE, dst ); + } +} + #if HAVE_VSX void x264_plane_copy_deinterleave_rgb_altivec( uint8_t *dsta, intptr_t i_dsta, uint8_t *dstb, intptr_t i_dstb, @@ -287,14 +316,14 @@ switch( i_width ) { case 4: - x264_pixel_avg2_w4_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w4_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); break; case 8: - x264_pixel_avg2_w8_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w8_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); break; case 16: default: - x264_pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); } if( weight->weightfn ) weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height ); @@ -306,13 +335,13 @@ switch( i_width ) { case 4: - x264_mc_copy_w4_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); + mc_copy_w4_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); break; case 8: - x264_mc_copy_w8_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); + mc_copy_w8_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); break; case 16: - x264_mc_copy_w16_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); + mc_copy_w16_altivec( dst, i_dst_stride, src1, i_src_stride, i_height ); break; } } @@ -334,18 +363,18 @@ switch( i_width ) { case 4: - x264_pixel_avg2_w4_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w4_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); break; case 8: - x264_pixel_avg2_w8_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w8_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); break; case 12: case 16: default: - x264_pixel_avg2_w16_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w16_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); break; case 20: - x264_pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); + pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); break; } if( weight->weightfn ) @@ -400,6 +429,14 @@ #define VSLD(a,b,n) vec_sld(b,a,16-n) #endif +#ifndef __POWER9_VECTOR__ +#define STORE4_ALIGNED(d, s) vec_ste( (vec_u32_t)s, 0, (uint32_t*) d ) +#define STORE2_UNALIGNED(d, s) vec_ste( vec_splat( (vec_u16_t)s, 0 ), 0, (uint16_t*)d ) +#else +#define STORE4_ALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 4 ) +#define STORE2_UNALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 2 ) +#endif + static void mc_chroma_4xh_altivec( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride, uint8_t *src, intptr_t i_src_stride, int mvx, int mvy, int i_height ) @@ -460,8 +497,8 @@ dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v ); dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v ); - vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu ); - vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv ); + STORE4_ALIGNED( dstu, dstuv ); + STORE4_ALIGNED( dstv, dstvv ); srcp += i_src_stride; dstu += i_dst_stride; @@ -482,8 +519,8 @@ dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v ); dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v ); - vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu ); - vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv ); + STORE4_ALIGNED( dstu, dstuv ); + STORE4_ALIGNED( dstv, dstvv ); srcp += i_src_stride; dstu += i_dst_stride; @@ -509,7 +546,6 @@ srcp = &src[i_src_stride]; LOAD_ZERO; - PREP_STORE8; vec_u16_t coeff0v, coeff1v, coeff2v, coeff3v; vec_u8_t src0v_8, src1v_8, src2v_8, src3v_8; vec_u8_t dstuv, dstvv; @@ -788,20 +824,13 @@ vec_u16_t twov, fourv, fivev, sixv; vec_s16_t sixteenv, thirtytwov; - vec_u16_u temp_u; - temp_u.s[0]=2; - twov = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=4; - fourv = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=5; - fivev = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=6; - sixv = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=16; - sixteenv = (vec_s16_t)vec_splat( temp_u.v, 0 ); - temp_u.s[0]=32; - thirtytwov = (vec_s16_t)vec_splat( temp_u.v, 0 ); + twov = vec_splats( (uint16_t)2 ); + fourv = vec_splats( (uint16_t)4 ); + fivev = vec_splats( (uint16_t)5 ); + sixv = vec_splats( (uint16_t)6 ); + sixteenv = vec_splats( (int16_t)16 ); + thirtytwov = vec_splats( (int16_t)32 ); for( int y = 0; y < i_height; y++ ) { @@ -952,18 +981,14 @@ hv = vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1); #endif - vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dst0); - vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dst0); - vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dsth); - vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dsth); + VEC_STORE8( lv, dst0 + 16 * x ); + VEC_STORE8( hv, dsth + 16 * x ); lv = vec_sld(lv, lv, 8); hv = vec_sld(hv, hv, 8); - vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dstv); - vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dstv); - vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dstc); - vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dstc); + VEC_STORE8( lv, dstv + 16 * x ); + VEC_STORE8( hv, dstc + 16 * x ); } src0 += src_stride*2; @@ -981,23 +1006,16 @@ vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1009,7 +1027,7 @@ weightv = vec_add( weightv, offsetv ); srcv = vec_packsu( weightv, zero_s16v ); - vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst ); + STORE2_UNALIGNED( dst, srcv ); } } else @@ -1022,7 +1040,7 @@ weightv = vec_mladd( weightv, scalev, offsetv ); srcv = vec_packsu( weightv, zero_s16v ); - vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst ); + STORE2_UNALIGNED( dst, srcv ); } } } @@ -1033,23 +1051,16 @@ vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1082,27 +1093,19 @@ const x264_weight_t *weight, int i_height ) { LOAD_ZERO; - PREP_STORE8; vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1138,23 +1141,16 @@ vec_u8_t srcv; vec_s16_t weight_lv, weight_hv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1196,15 +1192,11 @@ vec_u8_t srcv, srcv2; vec_s16_t weight_lv, weight_hv, weight_3v; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { @@ -1216,11 +1208,8 @@ { round, round, round, round, 0, 0, 0, 0 }, }; - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = round; - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1274,7 +1263,7 @@ } } -static weight_fn_t x264_mc_weight_wtab_altivec[6] = +static weight_fn_t mc_weight_wtab_altivec[6] = { mc_weight_w2_altivec, mc_weight_w4_altivec, @@ -1384,18 +1373,19 @@ pf->get_ref = get_ref_altivec; pf->mc_chroma = mc_chroma_altivec; - pf->copy_16x16_unaligned = x264_mc_copy_w16_altivec; - pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_altivec; + pf->copy_16x16_unaligned = mc_copy_w16_altivec; + pf->copy[PIXEL_16x16] = mc_copy_w16_aligned_altivec; pf->hpel_filter = x264_hpel_filter_altivec; pf->frame_init_lowres_core = frame_init_lowres_core_altivec; - pf->weight = x264_mc_weight_wtab_altivec; + pf->weight = mc_weight_wtab_altivec; - pf->plane_copy_swap = x264_plane_copy_swap_altivec; - pf->plane_copy_interleave = x264_plane_copy_interleave_altivec; + pf->plane_copy_swap = plane_copy_swap_altivec; + pf->plane_copy_interleave = plane_copy_interleave_altivec; pf->store_interleave_chroma = x264_store_interleave_chroma_altivec; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_altivec; + pf->load_deinterleave_chroma_fenc = load_deinterleave_chroma_fenc_altivec; #if HAVE_VSX pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_altivec; #endif // HAVE_VSX diff -Nru x264-0.152.2854+gite9a5903/common/ppc/mc.h x264-0.158.2988+git-20191101.7817004/common/ppc/mc.h --- x264-0.152.2854+gite9a5903/common/ppc/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * @@ -26,6 +26,7 @@ #ifndef X264_PPC_MC_H #define X264_PPC_MC_H +#define x264_mc_init_altivec x264_template(mc_init_altivec) void x264_mc_init_altivec( x264_mc_functions_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/pixel.c x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.c --- x264-0.152.2854+gite9a5903/common/ppc/pixel.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * Guillaume Poirier @@ -26,6 +26,7 @@ #include "common/common.h" #include "ppccommon.h" +#include "pixel.h" #if !HIGH_BIT_DEPTH /*********************************************************************** @@ -46,8 +47,7 @@ pix1v = vec_vsx_ld( 0, pix1 ); \ pix2v = vec_vsx_ld( 0, pix2 ); \ sumv = (vec_s32_t) vec_sum4s( \ - vec_sub( vec_max( pix1v, pix2v ), \ - vec_min( pix1v, pix2v ) ), \ + vec_absd( pix1v, pix2v ), \ (vec_u32_t) sumv ); \ pix1 += i_pix1; \ pix2 += i_pix2; \ @@ -114,6 +114,23 @@ VEC_ABS( a ); \ c = vec_sum4s( a, b ) +static ALWAYS_INLINE vec_s32_t add_abs_4( vec_s16_t a, vec_s16_t b, + vec_s16_t c, vec_s16_t d ) +{ + vec_s16_t t0 = vec_abs( a ); + vec_s16_t t1 = vec_abs( b ); + vec_s16_t t2 = vec_abs( c ); + vec_s16_t t3 = vec_abs( d ); + + vec_s16_t s0 = vec_adds( t0, t1 ); + vec_s16_t s1 = vec_adds( t2, t3 ); + + vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) ); + vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) ); + + return vec_add( s01, s23 ); +} + /*********************************************************************** * SATD 4x4 **********************************************************************/ @@ -142,10 +159,7 @@ VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -177,10 +191,8 @@ diff0v, diff1v, diff2v, diff3v ); VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + + satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff0v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff1v ); @@ -192,10 +204,8 @@ diff0v, diff1v, diff2v, diff3v ); VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_4( temp0v, temp1v, temp2v, temp3v ) ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -204,6 +214,36 @@ return i_satd >> 1; } +static ALWAYS_INLINE vec_s32_t add_abs_8( vec_s16_t a, vec_s16_t b, + vec_s16_t c, vec_s16_t d, + vec_s16_t e, vec_s16_t f, + vec_s16_t g, vec_s16_t h ) +{ + vec_s16_t t0 = vec_abs( a ); + vec_s16_t t1 = vec_abs( b ); + vec_s16_t t2 = vec_abs( c ); + vec_s16_t t3 = vec_abs( d ); + + vec_s16_t s0 = vec_adds( t0, t1 ); + vec_s16_t s1 = vec_adds( t2, t3 ); + + vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) ); + vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) ); + + vec_s16_t t4 = vec_abs( e ); + vec_s16_t t5 = vec_abs( f ); + vec_s16_t t6 = vec_abs( g ); + vec_s16_t t7 = vec_abs( h ); + + vec_s16_t s2 = vec_adds( t4, t5 ); + vec_s16_t s3 = vec_adds( t6, t7 ); + + vec_s32_t s0145 = vec_sum4s( s2, s01 ); + vec_s32_t s2367 = vec_sum4s( s3, s23 ); + + return vec_add( s0145, s2367 ); +} + /*********************************************************************** * SATD 8x4 **********************************************************************/ @@ -237,14 +277,8 @@ VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -292,14 +326,8 @@ VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -343,14 +371,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v ); @@ -372,14 +395,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -430,14 +448,8 @@ VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); @@ -454,14 +466,8 @@ VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -508,14 +514,10 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); + VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, @@ -528,14 +530,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh0v, diffl0v ); VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh1v, diffl1v ); @@ -557,14 +554,10 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); + VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, @@ -577,14 +570,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -635,10 +623,10 @@ pix3v = vec_vsx_ld( 0, pix3 ); pix3 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); pix0v = vec_vsx_ld( 0, pix0 ); pix0 += i_stride; @@ -655,10 +643,11 @@ pix3v = vec_vsx_ld( 0, pix3 ); pix3 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); + } sum0v = vec_sums( sum0v, zero_s32v ); @@ -712,10 +701,9 @@ pix2v = vec_vsx_ld( 0, pix2 ); pix2 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); pix0v = vec_vsx_ld( 0, pix0 ); pix0 += i_stride; @@ -729,9 +717,9 @@ pix2v = vec_vsx_ld( 0, pix2 ); pix2 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); } sum0v = vec_sums( sum0v, zero_s32v ); @@ -785,10 +773,10 @@ pix3v = vec_vsx_ld( 0, pix3 ); pix3 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); pix0v = vec_vsx_ld( 0, pix0 ); pix0 += i_stride; @@ -805,10 +793,10 @@ pix3v = vec_vsx_ld( 0, pix3 ); pix3 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); } sum0v = vec_sums( sum0v, zero_s32v ); @@ -832,6 +820,82 @@ scores[3] = sum3; } +#define PROCESS_PIXS \ + vec_u8_t pix0vH = vec_vsx_ld( 0, pix0 ); \ + pix0 += i_stride; \ + \ + vec_u8_t pix1vH = vec_vsx_ld( 0, pix1 ); \ + pix1 += i_stride; \ + \ + vec_u8_t fencvH = vec_vsx_ld( 0, fenc ); \ + fenc += FENC_STRIDE; \ + \ + vec_u8_t pix2vH = vec_vsx_ld( 0, pix2 ); \ + pix2 += i_stride; \ + \ + vec_u8_t pix0vL = vec_vsx_ld( 0, pix0 ); \ + pix0 += i_stride; \ + \ + vec_u8_t pix1vL = vec_vsx_ld( 0, pix1 ); \ + pix1 += i_stride; \ + \ + vec_u8_t fencvL = vec_vsx_ld( 0, fenc ); \ + fenc += FENC_STRIDE; \ + \ + vec_u8_t pix2vL = vec_vsx_ld( 0, pix2 ); \ + pix2 += i_stride; \ + \ + fencv = xxpermdi( fencvH, fencvL, 0 ); \ + pix0v = xxpermdi( pix0vH, pix0vL, 0 ); \ + pix1v = xxpermdi( pix1vH, pix1vL, 0 ); \ + pix2v = xxpermdi( pix2vH, pix2vL, 0 ); \ + \ + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); \ + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); \ + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + +#define PIXEL_SAD_X3_ALTIVEC( name, ly ) \ +static int name( uint8_t *fenc, uint8_t *pix0, \ + uint8_t *pix1, uint8_t *pix2, \ + intptr_t i_stride, int scores[3] ) \ +{ \ + ALIGNED_16( int sum0 ); \ + ALIGNED_16( int sum1 ); \ + ALIGNED_16( int sum2 ); \ + \ + LOAD_ZERO; \ + vec_u8_t fencv, pix0v, pix1v, pix2v; \ + vec_s32_t sum0v, sum1v, sum2v; \ + \ + sum0v = vec_splat_s32( 0 ); \ + sum1v = vec_splat_s32( 0 ); \ + sum2v = vec_splat_s32( 0 ); \ + \ + for( int y = 0; y < ly; y++ ) \ + { \ + PROCESS_PIXS \ + } \ + \ + sum0v = vec_sums( sum0v, zero_s32v ); \ + sum1v = vec_sums( sum1v, zero_s32v ); \ + sum2v = vec_sums( sum2v, zero_s32v ); \ + \ + sum0v = vec_splat( sum0v, 3 ); \ + sum1v = vec_splat( sum1v, 3 ); \ + sum2v = vec_splat( sum2v, 3 ); \ + \ + vec_ste( sum0v, 0, &sum0 ); \ + vec_ste( sum1v, 0, &sum1 ); \ + vec_ste( sum2v, 0, &sum2 ); \ + \ + scores[0] = sum0; \ + scores[1] = sum1; \ + scores[2] = sum2; \ +} + +PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x8_altivec, 4 ) +PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x16_altivec, 8 ) + static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, intptr_t i_stride, int scores[3] ) @@ -862,9 +926,9 @@ pix2v = vec_vsx_ld(0, pix2); pix2 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); pix0v = vec_vsx_ld(0, pix0); pix0 += i_stride; @@ -878,9 +942,9 @@ pix2v = vec_vsx_ld(0, pix2); pix2 += i_stride; - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); } sum0v = vec_sums( sum0v, zero_s32v ); @@ -900,308 +964,49 @@ scores[2] = sum2; } - -static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, - uint8_t *pix0, uint8_t *pix1, - uint8_t *pix2, uint8_t *pix3, - intptr_t i_stride, int scores[4] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - ALIGNED_16( int sum3 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; - vec_s32_t sum0v, sum1v, sum2v, sum3v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - sum3v = vec_splat_s32(0); - - for( int y = 0; y < 8; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - sum3v = vec_sum2s( sum3v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - sum3v = vec_splat( sum3v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - vec_ste( sum3v, 0, &sum3); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; - scores[3] = sum3; +#define PIXEL_SAD_X4_ALTIVEC( name, ly ) \ +static int name( uint8_t *fenc, \ + uint8_t *pix0, uint8_t *pix1, \ + uint8_t *pix2, uint8_t *pix3, \ + intptr_t i_stride, int scores[4] ) \ +{ \ + ALIGNED_16( int sum0 ); \ + ALIGNED_16( int sum1 ); \ + ALIGNED_16( int sum2 ); \ + \ + LOAD_ZERO; \ + vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; \ + vec_s32_t sum0v, sum1v, sum2v, sum3v; \ + \ + sum0v = vec_splat_s32( 0 ); \ + sum1v = vec_splat_s32( 0 ); \ + sum2v = vec_splat_s32( 0 ); \ + \ + for( int y = 0; y < ly; y++ ) \ + { \ + PROCESS_PIXS \ + vec_u8_t pix3vH = vec_vsx_ld( 0, pix3 ); \ + pix3 += i_stride; \ + vec_u8_t pix3vL = vec_vsx_ld( 0, pix3 ); \ + pix3 += i_stride; \ + pix3v = xxpermdi( pix3vH, pix3vL, 0 ); \ + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); \ + } \ + \ + sum0v = vec_sums( sum0v, zero_s32v ); \ + sum1v = vec_sums( sum1v, zero_s32v ); \ + sum2v = vec_sums( sum2v, zero_s32v ); \ + sum3v = vec_sums( sum3v, zero_s32v ); \ + \ + vec_s32_t s01 = vec_mergel( sum0v, sum1v ); \ + vec_s32_t s23 = vec_mergel( sum2v, sum3v ); \ + vec_s32_t s = xxpermdi( s01, s23, 3 ); \ + \ + vec_vsx_st( s, 0, scores ); \ } -static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, - uint8_t *pix1, uint8_t *pix2, - intptr_t i_stride, int scores[3] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v; - vec_s32_t sum0v, sum1v, sum2v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - - for( int y = 0; y < 8; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; -} - -static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, - uint8_t *pix0, uint8_t *pix1, - uint8_t *pix2, uint8_t *pix3, - intptr_t i_stride, int scores[4] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - ALIGNED_16( int sum3 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; - vec_s32_t sum0v, sum1v, sum2v, sum3v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - sum3v = vec_splat_s32(0); - - for( int y = 0; y < 4; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix3v ), vec_min( fencv, pix3v ) ), (vec_u32_t) sum3v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - sum3v = vec_sum2s( sum3v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - sum3v = vec_splat( sum3v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - vec_ste( sum3v, 0, &sum3); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; - scores[3] = sum3; -} - -static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, - uint8_t *pix1, uint8_t *pix2, - intptr_t i_stride, int scores[3] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v; - vec_s32_t sum0v, sum1v, sum2v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - - for( int y = 0; y < 4; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix0v ), vec_min( fencv, pix0v ) ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix1v ), vec_min( fencv, pix1v ) ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_sub( vec_max( fencv, pix2v ), vec_min( fencv, pix2v ) ), (vec_u32_t) sum2v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; -} +PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x8_altivec, 4 ) +PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x16_altivec, 8 ) /*********************************************************************** * SSD routines @@ -1215,7 +1020,7 @@ LOAD_ZERO; vec_u8_t pix1vA, pix2vA, pix1vB, pix2vB; vec_u32_t sumv; - vec_u8_t maxA, minA, diffA, maxB, minB, diffB; + vec_u8_t diffA, diffB; sumv = vec_splat_u32(0); @@ -1227,25 +1032,19 @@ pix1 += i_stride_pix1; pix2 += i_stride_pix2; - maxA = vec_max(pix1vA, pix2vA); - minA = vec_min(pix1vA, pix2vA); - pix2vB = vec_vsx_ld(0, pix2); pix1vB = vec_ld(0, pix1); - diffA = vec_sub(maxA, minA); + diffA = vec_absd(pix1vA, pix2vA); sumv = vec_msum(diffA, diffA, sumv); pix1 += i_stride_pix1; pix2 += i_stride_pix2; - maxB = vec_max(pix1vB, pix2vB); - minB = vec_min(pix1vB, pix2vB); - pix2vA = vec_vsx_ld(0, pix2); pix1vA = vec_ld(0, pix1); - diffB = vec_sub(maxB, minB); + diffB = vec_absd(pix1vB, pix2vB); sumv = vec_msum(diffB, diffB, sumv); } @@ -1255,16 +1054,10 @@ pix2vB = vec_vsx_ld(0, pix2); pix1vB = vec_ld(0, pix1); - maxA = vec_max(pix1vA, pix2vA); - minA = vec_min(pix1vA, pix2vA); - - maxB = vec_max(pix1vB, pix2vB); - minB = vec_min(pix1vB, pix2vB); - - diffA = vec_sub(maxA, minA); + diffA = vec_absd(pix1vA, pix2vA); sumv = vec_msum(diffA, diffA, sumv); - diffB = vec_sub(maxB, minB); + diffB = vec_absd(pix1vB, pix2vB); sumv = vec_msum(diffB, diffB, sumv); sumv = (vec_u32_t) vec_sums((vec_s32_t) sumv, zero_s32v); @@ -1281,7 +1074,7 @@ LOAD_ZERO; vec_u8_t pix1v, pix2v; vec_u32_t sumv; - vec_u8_t maxv, minv, diffv; + vec_u8_t diffv; const vec_u32_t sel = (vec_u32_t)CV(-1,-1,0,0); @@ -1292,10 +1085,7 @@ pix1v = vec_vsx_ld(0, pix1); pix2v = vec_vsx_ld(0, pix2); - maxv = vec_max(pix1v, pix2v); - minv = vec_min(pix1v, pix2v); - - diffv = vec_sub(maxv, minv); + diffv = vec_absd( pix1v, pix2v ); sumv = vec_msum(diffv, diffv, sumv); pix1 += i_stride_pix1; @@ -1315,7 +1105,7 @@ /**************************************************************************** * variance ****************************************************************************/ -static uint64_t x264_pixel_var_16x16_altivec( uint8_t *pix, intptr_t i_stride ) +static uint64_t pixel_var_16x16_altivec( uint8_t *pix, intptr_t i_stride ) { ALIGNED_16(uint32_t sum_tab[4]); ALIGNED_16(uint32_t sqr_tab[4]); @@ -1342,7 +1132,7 @@ return sum + ((uint64_t)sqr<<32); } -static uint64_t x264_pixel_var_8x8_altivec( uint8_t *pix, intptr_t i_stride ) +static uint64_t pixel_var_8x8_altivec( uint8_t *pix, intptr_t i_stride ) { ALIGNED_16(uint32_t sum_tab[4]); ALIGNED_16(uint32_t sqr_tab[4]); @@ -1634,7 +1424,7 @@ 0x1C,0x0C,0x1D,0x0D, 0x1E,0x0E,0x1F,0x0F ) }; -static uint64_t x264_pixel_hadamard_ac_16x16_altivec( uint8_t *pix, intptr_t stride ) +static uint64_t pixel_hadamard_ac_16x16_altivec( uint8_t *pix, intptr_t stride ) { int idx = ((uintptr_t)pix & 8) >> 3; vec_u8_t permh = hadamard_permtab[idx]; @@ -1646,7 +1436,7 @@ return ((sum>>34)<<32) + ((uint32_t)sum>>1); } -static uint64_t x264_pixel_hadamard_ac_16x8_altivec( uint8_t *pix, intptr_t stride ) +static uint64_t pixel_hadamard_ac_16x8_altivec( uint8_t *pix, intptr_t stride ) { int idx = ((uintptr_t)pix & 8) >> 3; vec_u8_t permh = hadamard_permtab[idx]; @@ -1656,7 +1446,7 @@ return ((sum>>34)<<32) + ((uint32_t)sum>>1); } -static uint64_t x264_pixel_hadamard_ac_8x16_altivec( uint8_t *pix, intptr_t stride ) +static uint64_t pixel_hadamard_ac_8x16_altivec( uint8_t *pix, intptr_t stride ) { vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ]; uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm ); @@ -1664,7 +1454,7 @@ return ((sum>>34)<<32) + ((uint32_t)sum>>1); } -static uint64_t x264_pixel_hadamard_ac_8x8_altivec( uint8_t *pix, intptr_t stride ) +static uint64_t pixel_hadamard_ac_8x8_altivec( uint8_t *pix, intptr_t stride ) { vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ]; uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm ); @@ -1739,7 +1529,7 @@ #define INTRA_MBCMP_8x8( mbcmp )\ -void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[36], int res[3] )\ +static void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[36], int res[3] )\ {\ ALIGNED_8( uint8_t pix[8*FDEC_STRIDE] );\ x264_predict_8x8_v_c( pix, edge );\ @@ -1754,7 +1544,7 @@ INTRA_MBCMP_8x8(sa8d) #define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma )\ -void intra_##mbcmp##_x3_##size##x##size##chroma##_altivec( uint8_t *fenc, uint8_t *fdec, int res[3] )\ +static void intra_##mbcmp##_x3_##size##x##size##chroma##_altivec( uint8_t *fenc, uint8_t *fdec, int res[3] )\ {\ x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\ res[0] = pixel_##mbcmp##_##size##x##size##_altivec( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ @@ -1832,13 +1622,13 @@ pixf->intra_sa8d_x3_8x8 = intra_sa8d_x3_8x8_altivec; - pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_altivec; - pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_altivec; + pixf->var[PIXEL_16x16] = pixel_var_16x16_altivec; + pixf->var[PIXEL_8x8] = pixel_var_8x8_altivec; - pixf->hadamard_ac[PIXEL_16x16] = x264_pixel_hadamard_ac_16x16_altivec; - pixf->hadamard_ac[PIXEL_16x8] = x264_pixel_hadamard_ac_16x8_altivec; - pixf->hadamard_ac[PIXEL_8x16] = x264_pixel_hadamard_ac_8x16_altivec; - pixf->hadamard_ac[PIXEL_8x8] = x264_pixel_hadamard_ac_8x8_altivec; + pixf->hadamard_ac[PIXEL_16x16] = pixel_hadamard_ac_16x16_altivec; + pixf->hadamard_ac[PIXEL_16x8] = pixel_hadamard_ac_16x8_altivec; + pixf->hadamard_ac[PIXEL_8x16] = pixel_hadamard_ac_8x16_altivec; + pixf->hadamard_ac[PIXEL_8x8] = pixel_hadamard_ac_8x8_altivec; pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec; #endif // !HIGH_BIT_DEPTH diff -Nru x264-0.152.2854+gite9a5903/common/ppc/pixel.h x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.h --- x264-0.152.2854+gite9a5903/common/ppc/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * @@ -26,6 +26,7 @@ #ifndef X264_PPC_PIXEL_H #define X264_PPC_PIXEL_H +#define x264_pixel_init_altivec x264_template(pixel_init_altivec) void x264_pixel_init_altivec( x264_pixel_function_t *pixf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/ppccommon.h x264-0.158.2988+git-20191101.7817004/common/ppc/ppccommon.h --- x264-0.152.2854+gite9a5903/common/ppc/ppccommon.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/ppccommon.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ppccommon.h: ppc utility macros ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Eric Petit * @@ -146,19 +146,14 @@ #define vec_s32_to_u16(v) vec_packsu( v, zero_s32v ) /*********************************************************************** - * PREP_STORE##n: declares required vectors to store n bytes to a - * potentially unaligned address * VEC_STORE##n: stores n bytes from vector v to address p **********************************************************************/ -#define PREP_STORE8 \ - vec_u8_t _tmp3v; \ - vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F } \ - -#define VEC_STORE8( v, p ) \ - _tmp3v = vec_vsx_ld( 0, p ); \ - v = vec_perm( v, _tmp3v, mask ); \ - vec_vsx_st( v, 0, p ) +#ifndef __POWER9_VECTOR__ +#define VEC_STORE8( v, p ) \ + vec_vsx_st( vec_xxpermdi( v, vec_vsx_ld( 0, p ), 1 ), 0, p ) +#else +#define VEC_STORE8( v, p ) vec_xst_len( v, p, 8 ) +#endif /*********************************************************************** * VEC_TRANSPOSE_8 @@ -304,3 +299,38 @@ vec_st(vec_perm(_e, _v, _m), off, _dst); \ } while( 0 ) #endif + +#ifndef __POWER9_VECTOR__ +#define vec_absd( a, b ) vec_sub( vec_max( a, b ), vec_min( a, b ) ) +#endif + +// vec_xxpermdi is quite useful but some version of clang do not expose it +#if !HAVE_VSX || (defined(__clang__) && __clang_major__ < 6) +static const vec_u8_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17 }; +static const vec_u8_t xxpermdi1_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, + 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; +static const vec_u8_t xxpermdi2_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17 }; +static const vec_u8_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F }; +#define xxpermdi(a, b, c) vec_perm(a, b, xxpermdi##c##_perm) +#elif (defined(__GNUC__) && (__GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 3))) || \ + (defined(__clang__) && __clang_major__ >= 7) +#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c) +#endif + +// vec_xxpermdi has its endianness bias exposed in early gcc and clang +#ifdef WORDS_BIGENDIAN +#ifndef xxpermdi +#define xxpermdi(a, b, c) vec_xxpermdi(a, b, c) +#endif +#else +#ifndef xxpermdi +#define xxpermdi(a, b, c) vec_xxpermdi(b, a, ((c >> 1) | (c & 1) << 1) ^ 3) +#endif +#endif diff -Nru x264-0.152.2854+gite9a5903/common/ppc/predict.c x264-0.158.2988+git-20191101.7817004/common/ppc/predict.c --- x264-0.152.2854+gite9a5903/common/ppc/predict.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/predict.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Guillaume Poirier * @@ -24,9 +24,9 @@ *****************************************************************************/ #include "common/common.h" +#include "ppccommon.h" #include "predict.h" #include "pixel.h" -#include "ppccommon.h" #if !HIGH_BIT_DEPTH static void predict_8x8c_p_altivec( uint8_t *src ) @@ -58,8 +58,6 @@ vec_s16_t induc_v = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7); vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v); - PREP_STORE8; - for( int i = 0; i < 8; ++i ) { vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v); @@ -67,7 +65,6 @@ VEC_STORE8(com_sat_v, &src[0]); src += FDEC_STRIDE; add_i0_b_0v = vec_adds(add_i0_b_0v, c_v); - } } diff -Nru x264-0.152.2854+gite9a5903/common/ppc/predict.h x264-0.158.2988+git-20191101.7817004/common/ppc/predict.h --- x264-0.152.2854+gite9a5903/common/ppc/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Guillaume Poirier * @@ -26,7 +26,9 @@ #ifndef X264_PPC_PREDICT_H #define X264_PPC_PREDICT_H +#define x264_predict_16x16_init_altivec x264_template(predict_16x16_init_altivec) void x264_predict_16x16_init_altivec( x264_predict_t pf[7] ); +#define x264_predict_8x8c_init_altivec x264_template(predict_8x8c_init_altivec) void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] ); #endif /* X264_PPC_PREDICT_H */ diff -Nru x264-0.152.2854+gite9a5903/common/ppc/quant.c x264-0.158.2988+git-20191101.7817004/common/ppc/quant.c --- x264-0.152.2854+gite9a5903/common/ppc/quant.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/quant.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Guillaume Poirier * @@ -39,8 +39,8 @@ biasvB = vec_ld((idx1), bias); \ mskA = vec_cmplt(temp1v, zero_s16v); \ mskB = vec_cmplt(temp2v, zero_s16v); \ - coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\ - coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\ + coefvA = (vec_u16_t)vec_abs( temp1v ); \ + coefvB = (vec_u16_t)vec_abs( temp2v ); \ coefvA = vec_adds(coefvA, biasvA); \ coefvB = vec_adds(coefvB, biasvB); \ multEvenvA = vec_mule(coefvA, mfvA); \ @@ -51,8 +51,12 @@ multOddvA = vec_sr(multOddvA, i_qbitsv); \ multEvenvB = vec_sr(multEvenvB, i_qbitsv); \ multOddvB = vec_sr(multOddvB, i_qbitsv); \ - temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \ - temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \ + temp1v = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \ + tmpv = xxpermdi( temp1v, temp1v, 2 ); \ + temp1v = vec_mergeh( temp1v, tmpv ); \ + temp2v = (vec_s16_t) vec_packs( multEvenvB, multOddvB ); \ + tmpv = xxpermdi( temp2v, temp2v, 2 ); \ + temp2v = vec_mergeh( temp2v, tmpv ); \ temp1v = vec_xor(temp1v, mskA); \ temp2v = vec_xor(temp2v, mskB); \ temp1v = vec_adds(temp1v, vec_and(mskA, one)); \ @@ -66,7 +70,7 @@ { LOAD_ZERO; vector bool short mskA; - vec_u32_t i_qbitsv; + vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 ); vec_u16_t coefvA; vec_u32_t multEvenvA, multOddvA; vec_u16_t mfvA; @@ -80,16 +84,214 @@ vec_u16_t mfvB; vec_u16_t biasvB; - vec_s16_t temp1v, temp2v; - - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); + vec_s16_t temp1v, temp2v, tmpv; QUANT_16_U( 0, 16 ); return vec_any_ne(nz, zero_s16v); } +int x264_quant_4x4x4_altivec( dctcoef dcta[4][16], udctcoef mf[16], udctcoef bias[16] ) +{ + LOAD_ZERO; + vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 ); + vec_s16_t one = vec_splat_s16( 1 ); + vec_s16_t nz0, nz1, nz2, nz3; + + vector bool short mskA0; + vec_u16_t coefvA0; + vec_u32_t multEvenvA0, multOddvA0; + vec_u16_t mfvA0; + vec_u16_t biasvA0; + vector bool short mskB0; + vec_u16_t coefvB0; + vec_u32_t multEvenvB0, multOddvB0; + vec_u16_t mfvB0; + vec_u16_t biasvB0; + + vector bool short mskA1; + vec_u16_t coefvA1; + vec_u32_t multEvenvA1, multOddvA1; + vec_u16_t mfvA1; + vec_u16_t biasvA1; + vector bool short mskB1; + vec_u16_t coefvB1; + vec_u32_t multEvenvB1, multOddvB1; + vec_u16_t mfvB1; + vec_u16_t biasvB1; + + vector bool short mskA2; + vec_u16_t coefvA2; + vec_u32_t multEvenvA2, multOddvA2; + vec_u16_t mfvA2; + vec_u16_t biasvA2; + vector bool short mskB2; + vec_u16_t coefvB2; + vec_u32_t multEvenvB2, multOddvB2; + vec_u16_t mfvB2; + vec_u16_t biasvB2; + + vector bool short mskA3; + vec_u16_t coefvA3; + vec_u32_t multEvenvA3, multOddvA3; + vec_u16_t mfvA3; + vec_u16_t biasvA3; + vector bool short mskB3; + vec_u16_t coefvB3; + vec_u32_t multEvenvB3, multOddvB3; + vec_u16_t mfvB3; + vec_u16_t biasvB3; + + vec_s16_t temp1v, temp2v; + vec_s16_t tmpv0; + vec_s16_t tmpv1; + + dctcoef *dct0 = dcta[0]; + dctcoef *dct1 = dcta[1]; + dctcoef *dct2 = dcta[2]; + dctcoef *dct3 = dcta[3]; + + temp1v = vec_ld( 0, dct0 ); + temp2v = vec_ld( 16, dct0 ); + mfvA0 = vec_ld( 0, mf ); + mfvB0 = vec_ld( 16, mf ); + biasvA0 = vec_ld( 0, bias ); + biasvB0 = vec_ld( 16, bias ); + mskA0 = vec_cmplt( temp1v, zero_s16v ); + mskB0 = vec_cmplt( temp2v, zero_s16v ); + coefvA0 = (vec_u16_t)vec_abs( temp1v ); + coefvB0 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct1 ); + temp2v = vec_ld( 16, dct1 ); + mfvA1 = vec_ld( 0, mf ); + mfvB1 = vec_ld( 16, mf ); + biasvA1 = vec_ld( 0, bias ); + biasvB1 = vec_ld( 16, bias ); + mskA1 = vec_cmplt( temp1v, zero_s16v ); + mskB1 = vec_cmplt( temp2v, zero_s16v ); + coefvA1 = (vec_u16_t)vec_abs( temp1v ); + coefvB1 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct2 ); + temp2v = vec_ld( 16, dct2 ); + mfvA2 = vec_ld( 0, mf ); + mfvB2 = vec_ld( 16, mf ); + biasvA2 = vec_ld( 0, bias ); + biasvB2 = vec_ld( 16, bias ); + mskA2 = vec_cmplt( temp1v, zero_s16v ); + mskB2 = vec_cmplt( temp2v, zero_s16v ); + coefvA2 = (vec_u16_t)vec_abs( temp1v ); + coefvB2 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct3 ); + temp2v = vec_ld( 16, dct3 ); + mfvA3 = vec_ld( 0, mf ); + mfvB3 = vec_ld( 16, mf ); + biasvA3 = vec_ld( 0, bias ); + biasvB3 = vec_ld( 16, bias ); + mskA3 = vec_cmplt( temp1v, zero_s16v ); + mskB3 = vec_cmplt( temp2v, zero_s16v ); + coefvA3 = (vec_u16_t)vec_abs( temp1v ); + coefvB3 = (vec_u16_t)vec_abs( temp2v ); + + coefvA0 = vec_adds( coefvA0, biasvA0 ); + coefvB0 = vec_adds( coefvB0, biasvB0 ); + coefvA1 = vec_adds( coefvA1, biasvA1 ); + coefvB1 = vec_adds( coefvB1, biasvB1 ); + coefvA2 = vec_adds( coefvA2, biasvA2 ); + coefvB2 = vec_adds( coefvB2, biasvB2 ); + coefvA3 = vec_adds( coefvA3, biasvA3 ); + coefvB3 = vec_adds( coefvB3, biasvB3 ); + + multEvenvA0 = vec_mule( coefvA0, mfvA0 ); + multOddvA0 = vec_mulo( coefvA0, mfvA0 ); + multEvenvB0 = vec_mule( coefvB0, mfvB0 ); + multOddvB0 = vec_mulo( coefvB0, mfvB0 ); + multEvenvA0 = vec_sr( multEvenvA0, i_qbitsv ); + multOddvA0 = vec_sr( multOddvA0, i_qbitsv ); + multEvenvB0 = vec_sr( multEvenvB0, i_qbitsv ); + multOddvB0 = vec_sr( multOddvB0, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA0, multOddvA0 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB0, multOddvB0 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA0 ); + temp2v = vec_xor( temp2v, mskB0 ); + temp1v = vec_adds( temp1v, vec_and( mskA0, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB0, one ) ); + vec_st( temp1v, 0, dct0 ); + vec_st( temp2v, 16, dct0 ); + nz0 = vec_or( temp1v, temp2v ); + + multEvenvA1 = vec_mule( coefvA1, mfvA1 ); + multOddvA1 = vec_mulo( coefvA1, mfvA1 ); + multEvenvB1 = vec_mule( coefvB1, mfvB1 ); + multOddvB1 = vec_mulo( coefvB1, mfvB1 ); + multEvenvA1 = vec_sr( multEvenvA1, i_qbitsv ); + multOddvA1 = vec_sr( multOddvA1, i_qbitsv ); + multEvenvB1 = vec_sr( multEvenvB1, i_qbitsv ); + multOddvB1 = vec_sr( multOddvB1, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA1, multOddvA1 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB1, multOddvB1 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA1 ); + temp2v = vec_xor( temp2v, mskB1 ); + temp1v = vec_adds( temp1v, vec_and( mskA1, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB1, one ) ); + vec_st( temp1v, 0, dct1 ); + vec_st( temp2v, 16, dct1 ); + nz1 = vec_or( temp1v, temp2v ); + + multEvenvA2 = vec_mule( coefvA2, mfvA2 ); + multOddvA2 = vec_mulo( coefvA2, mfvA2 ); + multEvenvB2 = vec_mule( coefvB2, mfvB2 ); + multOddvB2 = vec_mulo( coefvB2, mfvB2 ); + multEvenvA2 = vec_sr( multEvenvA2, i_qbitsv ); + multOddvA2 = vec_sr( multOddvA2, i_qbitsv ); + multEvenvB2 = vec_sr( multEvenvB2, i_qbitsv ); + multOddvB2 = vec_sr( multOddvB2, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA2, multOddvA2 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB2, multOddvB2 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA2 ); + temp2v = vec_xor( temp2v, mskB2 ); + temp1v = vec_adds( temp1v, vec_and( mskA2, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB2, one ) ); + vec_st( temp1v, 0, dct2 ); + vec_st( temp2v, 16, dct2 ); + nz2 = vec_or( temp1v, temp2v ); + + multEvenvA3 = vec_mule( coefvA3, mfvA3 ); + multOddvA3 = vec_mulo( coefvA3, mfvA3 ); + multEvenvB3 = vec_mule( coefvB3, mfvB3 ); + multOddvB3 = vec_mulo( coefvB3, mfvB3 ); + multEvenvA3 = vec_sr( multEvenvA3, i_qbitsv ); + multOddvA3 = vec_sr( multOddvA3, i_qbitsv ); + multEvenvB3 = vec_sr( multEvenvB3, i_qbitsv ); + multOddvB3 = vec_sr( multOddvB3, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA3, multOddvA3 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB3, multOddvB3 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA3 ); + temp2v = vec_xor( temp2v, mskB3 ); + temp1v = vec_adds( temp1v, vec_and( mskA3, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB3, one ) ); + vec_st( temp1v, 0, dct3 ); + vec_st( temp2v, 16, dct3 ); + nz3 = vec_or( temp1v, temp2v ); + + return (vec_any_ne( nz0, zero_s16v ) << 0) | (vec_any_ne( nz1, zero_s16v ) << 1) | + (vec_any_ne( nz2, zero_s16v ) << 2) | (vec_any_ne( nz3, zero_s16v ) << 3); +} + // DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled" #define QUANT_16_U_DC( idx0, idx1 ) \ { \ @@ -139,17 +341,9 @@ vec_u16_t mfv; vec_u16_t biasv; - vec_u16_u mf_u; - mf_u.s[0]=mf; - mfv = vec_splat( mf_u.v, 0 ); - - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); - - vec_u16_u bias_u; - bias_u.s[0]=bias; - biasv = vec_splat(bias_u.v, 0); + mfv = vec_splats( (uint16_t)mf ); + i_qbitsv = vec_splats( (uint32_t) 16 ); + biasv = vec_splats( (uint16_t)bias ); QUANT_16_U_DC( 0, 16 ); return vec_any_ne(nz, zero_s16v); @@ -184,25 +378,17 @@ vec_u32_t multEvenvA, multOddvA; vec_s16_t one = vec_splat_s16(1); vec_s16_t nz = zero_s16v; + static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0); vec_s16_t temp1v, temp2v; vec_u16_t mfv; vec_u16_t biasv; - vec_u16_u mf_u; - mf_u.s[0]=mf; - mfv = vec_splat( mf_u.v, 0 ); - - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); - - vec_u16_u bias_u; - bias_u.s[0]=bias; - biasv = vec_splat(bias_u.v, 0); + mfv = vec_splats( (uint16_t)mf ); + i_qbitsv = vec_splats( (uint32_t) 16 ); + biasv = vec_splats( (uint16_t)bias ); - static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0); QUANT_4_U_DC(0); return vec_any_ne(vec_and(nz, mask2), zero_s16v); } @@ -225,11 +411,9 @@ vec_u16_t mfvB; vec_u16_t biasvB; - vec_s16_t temp1v, temp2v; + vec_s16_t temp1v, temp2v, tmpv; - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats( (uint32_t)16 ); for( int i = 0; i < 4; i++ ) QUANT_16_U( i*2*16, i*2*16+16 ); @@ -245,8 +429,9 @@ \ multEvenvA = vec_mule(dctv, mfv); \ multOddvA = vec_mulo(dctv, mfv); \ - dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), \ - vec_mergel(multEvenvA, multOddvA)); \ + dctv = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \ + tmpv = xxpermdi( dctv, dctv, 2 ); \ + dctv = vec_mergeh( dctv, tmpv ); \ dctv = vec_sl(dctv, i_qbitsv); \ vec_st(dctv, 8*y, dct); \ } @@ -288,7 +473,7 @@ int i_mf = i_qp%6; int i_qbits = i_qp/6 - 4; - vec_s16_t dctv; + vec_s16_t dctv, tmpv; vec_s16_t dct1v, dct2v; vec_s32_t mf1v, mf2v; vec_s16_t mfv; @@ -298,9 +483,7 @@ if( i_qbits >= 0 ) { vec_u16_t i_qbitsv; - vec_u16_u qbits_u; - qbits_u.s[0]=i_qbits; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats( (uint16_t) i_qbits ); for( int y = 0; y < 4; y+=2 ) DEQUANT_SHL(); @@ -310,19 +493,13 @@ const int f = 1 << (-i_qbits-1); vec_s32_t fv; - vec_u32_u f_u; - f_u.s[0]=f; - fv = (vec_s32_t)vec_splat(f_u.v, 0); + fv = vec_splats( f ); vec_u32_t i_qbitsv; - vec_u32_u qbits_u; - qbits_u.s[0]=-i_qbits; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats( (uint32_t)-i_qbits ); vec_u32_t sixteenv; - vec_u32_u sixteen_u; - sixteen_u.s[0]=16; - sixteenv = vec_splat(sixteen_u.v, 0); + sixteenv = vec_splats( (uint32_t)16 ); for( int y = 0; y < 4; y+=2 ) DEQUANT_SHR(); @@ -334,7 +511,7 @@ int i_mf = i_qp%6; int i_qbits = i_qp/6 - 6; - vec_s16_t dctv; + vec_s16_t dctv, tmpv; vec_s16_t dct1v, dct2v; vec_s32_t mf1v, mf2v; vec_s16_t mfv; @@ -344,9 +521,7 @@ if( i_qbits >= 0 ) { vec_u16_t i_qbitsv; - vec_u16_u qbits_u; - qbits_u.s[0]=i_qbits; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats((uint16_t)i_qbits ); for( int y = 0; y < 16; y+=2 ) DEQUANT_SHL(); @@ -356,19 +531,13 @@ const int f = 1 << (-i_qbits-1); vec_s32_t fv; - vec_u32_u f_u; - f_u.s[0]=f; - fv = (vec_s32_t)vec_splat(f_u.v, 0); + fv = vec_splats( f ); vec_u32_t i_qbitsv; - vec_u32_u qbits_u; - qbits_u.s[0]=-i_qbits; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats( (uint32_t)-i_qbits ); vec_u32_t sixteenv; - vec_u32_u sixteen_u; - sixteen_u.s[0]=16; - sixteenv = vec_splat(sixteen_u.v, 0); + sixteenv = vec_splats( (uint32_t)16 ); for( int y = 0; y < 16; y+=2 ) DEQUANT_SHR(); diff -Nru x264-0.152.2854+gite9a5903/common/ppc/quant.h x264-0.158.2988+git-20191101.7817004/common/ppc/quant.h --- x264-0.152.2854+gite9a5903/common/ppc/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/ppc/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2017 x264 project + * Copyright (C) 2007-2019 x264 project * * Authors: Guillaume Poirier * @@ -26,12 +26,21 @@ #ifndef X264_PPC_QUANT_H #define X264_PPC_QUANT_H +#define x264_quant_4x4x4_altivec x264_template(quant_4x4x4_altivec) +int x264_quant_4x4x4_altivec( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_4x4_altivec x264_template(quant_4x4_altivec) int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ); +#define x264_quant_8x8_altivec x264_template(quant_8x8_altivec) int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ); +#define x264_quant_4x4_dc_altivec x264_template(quant_4x4_dc_altivec) int x264_quant_4x4_dc_altivec( int16_t dct[16], int mf, int bias ); +#define x264_quant_2x2_dc_altivec x264_template(quant_2x2_dc_altivec) int x264_quant_2x2_dc_altivec( int16_t dct[4], int mf, int bias ); +#define x264_dequant_4x4_altivec x264_template(dequant_4x4_altivec) void x264_dequant_4x4_altivec( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_altivec x264_template(dequant_8x8_altivec) void x264_dequant_8x8_altivec( int16_t dct[64], int dequant_mf[6][64], int i_qp ); + #endif diff -Nru x264-0.152.2854+gite9a5903/common/predict.c x264-0.158.2988+git-20191101.7817004/common/predict.c --- x264-0.152.2854+gite9a5903/common/predict.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/predict.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -34,16 +34,16 @@ #if HAVE_MMX # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/predict.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/predict.h" #endif @@ -74,7 +74,7 @@ PREDICT_16x16_DC( dcsplat ); } -static void x264_predict_16x16_dc_left_c( pixel *src ) +static void predict_16x16_dc_left_c( pixel *src ) { int dc = 0; @@ -84,7 +84,7 @@ PREDICT_16x16_DC( dcsplat ); } -static void x264_predict_16x16_dc_top_c( pixel *src ) +static void predict_16x16_dc_top_c( pixel *src ) { int dc = 0; @@ -94,7 +94,7 @@ PREDICT_16x16_DC( dcsplat ); } -static void x264_predict_16x16_dc_128_c( pixel *src ) +static void predict_16x16_dc_128_c( pixel *src ) { PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); } @@ -161,7 +161,7 @@ * 8x8 prediction for intra chroma block (4:2:0) ****************************************************************************/ -static void x264_predict_8x8c_dc_128_c( pixel *src ) +static void predict_8x8c_dc_128_c( pixel *src ) { for( int y = 0; y < 8; y++ ) { @@ -170,7 +170,7 @@ src += FDEC_STRIDE; } } -static void x264_predict_8x8c_dc_left_c( pixel *src ) +static void predict_8x8c_dc_left_c( pixel *src ) { int dc0 = 0, dc1 = 0; @@ -196,7 +196,7 @@ } } -static void x264_predict_8x8c_dc_top_c( pixel *src ) +static void predict_8x8c_dc_top_c( pixel *src ) { int dc0 = 0, dc1 = 0; @@ -308,7 +308,7 @@ * 8x16 prediction for intra chroma block (4:2:2) ****************************************************************************/ -static void x264_predict_8x16c_dc_128_c( pixel *src ) +static void predict_8x16c_dc_128_c( pixel *src ) { for( int y = 0; y < 16; y++ ) { @@ -317,7 +317,7 @@ src += FDEC_STRIDE; } } -static void x264_predict_8x16c_dc_left_c( pixel *src ) +static void predict_8x16c_dc_left_c( pixel *src ) { for( int i = 0; i < 4; i++ ) { @@ -336,7 +336,7 @@ } } } -static void x264_predict_8x16c_dc_top_c( pixel *src ) +static void predict_8x16c_dc_top_c( pixel *src ) { int dc0 = 0, dc1 = 0; @@ -475,16 +475,16 @@ #define PREDICT_4x4_DC(v)\ SRC_X4(0,0) = SRC_X4(0,1) = SRC_X4(0,2) = SRC_X4(0,3) = v; -static void x264_predict_4x4_dc_128_c( pixel *src ) +static void predict_4x4_dc_128_c( pixel *src ) { PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); } -static void x264_predict_4x4_dc_left_c( pixel *src ) +static void predict_4x4_dc_left_c( pixel *src ) { pixel4 dc = PIXEL_SPLAT_X4( (SRC(-1,0) + SRC(-1,1) + SRC(-1,2) + SRC(-1,3) + 2) >> 2 ); PREDICT_4x4_DC( dc ); } -static void x264_predict_4x4_dc_top_c( pixel *src ) +static void predict_4x4_dc_top_c( pixel *src ) { pixel4 dc = PIXEL_SPLAT_X4( (SRC(0,-1) + SRC(1,-1) + SRC(2,-1) + SRC(3,-1) + 2) >> 2 ); PREDICT_4x4_DC( dc ); @@ -528,7 +528,7 @@ #define F1(a,b) (((a)+(b)+1)>>1) #define F2(a,b,c) (((a)+2*(b)+(c)+2)>>2) -static void x264_predict_4x4_ddl_c( pixel *src ) +static void predict_4x4_ddl_c( pixel *src ) { PREDICT_4x4_LOAD_TOP PREDICT_4x4_LOAD_TOP_RIGHT @@ -540,7 +540,7 @@ SRC(3,2)=SRC(2,3)= F2(t5,t6,t7); SRC(3,3)= F2(t6,t7,t7); } -static void x264_predict_4x4_ddr_c( pixel *src ) +static void predict_4x4_ddr_c( pixel *src ) { int lt = SRC(-1,-1); PREDICT_4x4_LOAD_LEFT @@ -554,7 +554,7 @@ SRC(0,3)= F2(l1,l2,l3); } -static void x264_predict_4x4_vr_c( pixel *src ) +static void predict_4x4_vr_c( pixel *src ) { int lt = SRC(-1,-1); PREDICT_4x4_LOAD_LEFT @@ -571,7 +571,7 @@ SRC(3,0)= F1(t2,t3); } -static void x264_predict_4x4_hd_c( pixel *src ) +static void predict_4x4_hd_c( pixel *src ) { int lt= SRC(-1,-1); PREDICT_4x4_LOAD_LEFT @@ -588,7 +588,7 @@ SRC(3,0)= F2(t2,t1,t0); } -static void x264_predict_4x4_vl_c( pixel *src ) +static void predict_4x4_vl_c( pixel *src ) { PREDICT_4x4_LOAD_TOP PREDICT_4x4_LOAD_TOP_RIGHT @@ -604,7 +604,7 @@ SRC(3,3)= F2(t4,t5,t6); } -static void x264_predict_4x4_hu_c( pixel *src ) +static void predict_4x4_hu_c( pixel *src ) { PREDICT_4x4_LOAD_LEFT SRC(0,0)= F1(l0,l1); @@ -626,7 +626,7 @@ #define PT(x) \ edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1)); -static void x264_predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters ) +static void predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters ) { /* edge[7..14] = l7..l0 * edge[15] = lt @@ -694,17 +694,17 @@ src += FDEC_STRIDE; \ } -static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[36] ) +static void predict_8x8_dc_128_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) ); } -static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[36] ) +static void predict_8x8_dc_left_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_LEFT pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 ); PREDICT_8x8_DC( dc ); } -static void x264_predict_8x8_dc_top_c( pixel *src, pixel edge[36] ) +static void predict_8x8_dc_top_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 ); @@ -735,7 +735,7 @@ MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1]; } } -static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[36] ) +static void predict_8x8_ddl_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP PREDICT_8x8_LOAD_TOPRIGHT @@ -755,7 +755,7 @@ SRC(6,7)=SRC(7,6)= F2(t13,t14,t15); SRC(7,7)= F2(t14,t15,t15); } -static void x264_predict_8x8_ddr_c( pixel *src, pixel edge[36] ) +static void predict_8x8_ddr_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP PREDICT_8x8_LOAD_LEFT @@ -777,7 +777,7 @@ SRC(7,0)= F2(t5,t6,t7); } -static void x264_predict_8x8_vr_c( pixel *src, pixel edge[36] ) +static void predict_8x8_vr_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP PREDICT_8x8_LOAD_LEFT @@ -805,7 +805,7 @@ SRC(7,1)= F2(t5,t6,t7); SRC(7,0)= F1(t6,t7); } -static void x264_predict_8x8_hd_c( pixel *src, pixel edge[36] ) +static void predict_8x8_hd_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP PREDICT_8x8_LOAD_LEFT @@ -832,7 +832,7 @@ SRC_X4(4,1)= pack_pixel_2to4(p9,p10); SRC_X4(4,0)= pack_pixel_2to4(p10,p11); } -static void x264_predict_8x8_vl_c( pixel *src, pixel edge[36] ) +static void predict_8x8_vl_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_TOP PREDICT_8x8_LOAD_TOPRIGHT @@ -859,7 +859,7 @@ SRC(7,6)= F1(t10,t11); SRC(7,7)= F2(t10,t11,t12); } -static void x264_predict_8x8_hu_c( pixel *src, pixel edge[36] ) +static void predict_8x8_hu_c( pixel *src, pixel edge[36] ) { PREDICT_8x8_LOAD_LEFT int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2)); @@ -889,9 +889,9 @@ pf[I_PRED_16x16_H ] = x264_predict_16x16_h_c; pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_c; pf[I_PRED_16x16_P ] = x264_predict_16x16_p_c; - pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_c; - pf[I_PRED_16x16_DC_TOP ]= x264_predict_16x16_dc_top_c; - pf[I_PRED_16x16_DC_128 ]= x264_predict_16x16_dc_128_c; + pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left_c; + pf[I_PRED_16x16_DC_TOP ]= predict_16x16_dc_top_c; + pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128_c; #if HAVE_MMX x264_predict_16x16_init_mmx( cpu, pf ); @@ -906,7 +906,7 @@ x264_predict_16x16_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_16x16_init_aarch64( cpu, pf ); #endif @@ -932,9 +932,9 @@ pf[I_PRED_CHROMA_H ] = x264_predict_8x8c_h_c; pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_c; pf[I_PRED_CHROMA_P ] = x264_predict_8x8c_p_c; - pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x8c_dc_left_c; - pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x8c_dc_top_c; - pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x8c_dc_128_c; + pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left_c; + pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top_c; + pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128_c; #if HAVE_MMX x264_predict_8x8c_init_mmx( cpu, pf ); @@ -949,7 +949,7 @@ x264_predict_8x8c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8c_init_aarch64( cpu, pf ); #endif @@ -969,9 +969,9 @@ pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_c; pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_c; pf[I_PRED_CHROMA_P ] = x264_predict_8x16c_p_c; - pf[I_PRED_CHROMA_DC_LEFT]= x264_predict_8x16c_dc_left_c; - pf[I_PRED_CHROMA_DC_TOP ]= x264_predict_8x16c_dc_top_c; - pf[I_PRED_CHROMA_DC_128 ]= x264_predict_8x16c_dc_128_c; + pf[I_PRED_CHROMA_DC_LEFT]= predict_8x16c_dc_left_c; + pf[I_PRED_CHROMA_DC_TOP ]= predict_8x16c_dc_top_c; + pf[I_PRED_CHROMA_DC_128 ]= predict_8x16c_dc_128_c; #if HAVE_MMX x264_predict_8x16c_init_mmx( cpu, pf ); @@ -981,7 +981,7 @@ x264_predict_8x16c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x16c_init_aarch64( cpu, pf ); #endif } @@ -991,16 +991,16 @@ pf[I_PRED_8x8_V] = x264_predict_8x8_v_c; pf[I_PRED_8x8_H] = x264_predict_8x8_h_c; pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_c; - pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_c; - pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_c; - pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_c; - pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_c; - pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_c; - pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_c; - pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_c; - pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_c; - pf[I_PRED_8x8_DC_128] = x264_predict_8x8_dc_128_c; - *predict_filter = x264_predict_8x8_filter_c; + pf[I_PRED_8x8_DDL] = predict_8x8_ddl_c; + pf[I_PRED_8x8_DDR] = predict_8x8_ddr_c; + pf[I_PRED_8x8_VR] = predict_8x8_vr_c; + pf[I_PRED_8x8_HD] = predict_8x8_hd_c; + pf[I_PRED_8x8_VL] = predict_8x8_vl_c; + pf[I_PRED_8x8_HU] = predict_8x8_hu_c; + pf[I_PRED_8x8_DC_LEFT]= predict_8x8_dc_left_c; + pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_c; + pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128_c; + *predict_filter = predict_8x8_filter_c; #if HAVE_MMX x264_predict_8x8_init_mmx( cpu, pf, predict_filter ); @@ -1010,7 +1010,7 @@ x264_predict_8x8_init_arm( cpu, pf, predict_filter ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); #endif @@ -1029,15 +1029,15 @@ pf[I_PRED_4x4_V] = x264_predict_4x4_v_c; pf[I_PRED_4x4_H] = x264_predict_4x4_h_c; pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_c; - pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_c; - pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_c; - pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_c; - pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_c; - pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_c; - pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_c; - pf[I_PRED_4x4_DC_LEFT]= x264_predict_4x4_dc_left_c; - pf[I_PRED_4x4_DC_TOP] = x264_predict_4x4_dc_top_c; - pf[I_PRED_4x4_DC_128] = x264_predict_4x4_dc_128_c; + pf[I_PRED_4x4_DDL] = predict_4x4_ddl_c; + pf[I_PRED_4x4_DDR] = predict_4x4_ddr_c; + pf[I_PRED_4x4_VR] = predict_4x4_vr_c; + pf[I_PRED_4x4_HD] = predict_4x4_hd_c; + pf[I_PRED_4x4_VL] = predict_4x4_vl_c; + pf[I_PRED_4x4_HU] = predict_4x4_hu_c; + pf[I_PRED_4x4_DC_LEFT]= predict_4x4_dc_left_c; + pf[I_PRED_4x4_DC_TOP] = predict_4x4_dc_top_c; + pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128_c; #if HAVE_MMX x264_predict_4x4_init_mmx( cpu, pf ); @@ -1047,7 +1047,7 @@ x264_predict_4x4_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_4x4_init_aarch64( cpu, pf ); #endif } diff -Nru x264-0.152.2854+gite9a5903/common/predict.h x264-0.158.2988+git-20191101.7817004/common/predict.h --- x264-0.152.2854+gite9a5903/common/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: intra prediction ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -109,30 +109,52 @@ I_PRED_8x8_DC_128 = 11, }; +#define x264_predict_8x8_dc_c x264_template(predict_8x8_dc_c) void x264_predict_8x8_dc_c ( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_h_c x264_template(predict_8x8_h_c) void x264_predict_8x8_h_c ( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_v_c x264_template(predict_8x8_v_c) void x264_predict_8x8_v_c ( pixel *src, pixel edge[36] ); +#define x264_predict_4x4_dc_c x264_template(predict_4x4_dc_c) void x264_predict_4x4_dc_c ( pixel *src ); +#define x264_predict_4x4_h_c x264_template(predict_4x4_h_c) void x264_predict_4x4_h_c ( pixel *src ); +#define x264_predict_4x4_v_c x264_template(predict_4x4_v_c) void x264_predict_4x4_v_c ( pixel *src ); +#define x264_predict_16x16_dc_c x264_template(predict_16x16_dc_c) void x264_predict_16x16_dc_c( pixel *src ); +#define x264_predict_16x16_h_c x264_template(predict_16x16_h_c) void x264_predict_16x16_h_c ( pixel *src ); +#define x264_predict_16x16_v_c x264_template(predict_16x16_v_c) void x264_predict_16x16_v_c ( pixel *src ); +#define x264_predict_16x16_p_c x264_template(predict_16x16_p_c) void x264_predict_16x16_p_c ( pixel *src ); +#define x264_predict_8x8c_dc_c x264_template(predict_8x8c_dc_c) void x264_predict_8x8c_dc_c ( pixel *src ); +#define x264_predict_8x8c_h_c x264_template(predict_8x8c_h_c) void x264_predict_8x8c_h_c ( pixel *src ); +#define x264_predict_8x8c_v_c x264_template(predict_8x8c_v_c) void x264_predict_8x8c_v_c ( pixel *src ); +#define x264_predict_8x8c_p_c x264_template(predict_8x8c_p_c) void x264_predict_8x8c_p_c ( pixel *src ); +#define x264_predict_8x16c_dc_c x264_template(predict_8x16c_dc_c) void x264_predict_8x16c_dc_c( pixel *src ); +#define x264_predict_8x16c_h_c x264_template(predict_8x16c_h_c) void x264_predict_8x16c_h_c ( pixel *src ); +#define x264_predict_8x16c_v_c x264_template(predict_8x16c_v_c) void x264_predict_8x16c_v_c ( pixel *src ); +#define x264_predict_8x16c_p_c x264_template(predict_8x16c_p_c) void x264_predict_8x16c_p_c ( pixel *src ); +#define x264_predict_16x16_init x264_template(predict_16x16_init) void x264_predict_16x16_init ( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x8c_init x264_template(predict_8x8c_init) void x264_predict_8x8c_init ( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x16c_init x264_template(predict_8x16c_init) void x264_predict_8x16c_init ( int cpu, x264_predict_t pf[7] ); +#define x264_predict_4x4_init x264_template(predict_4x4_init) void x264_predict_4x4_init ( int cpu, x264_predict_t pf[12] ); +#define x264_predict_8x8_init x264_template(predict_8x8_init) void x264_predict_8x8_init ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); - #endif diff -Nru x264-0.152.2854+gite9a5903/common/quant.c x264-0.158.2988+git-20191101.7817004/common/quant.c --- x264-0.152.2854+gite9a5903/common/quant.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/quant.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -31,16 +31,16 @@ #if HAVE_MMX #include "x86/quant.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/quant.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/quant.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/quant.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/quant.h" #endif @@ -298,7 +298,7 @@ return optimize_chroma_dc_internal( dct, dequant_mf, 1 ); } -static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ) +static void denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ) { for( int i = 0; i < size; i++ ) { @@ -320,19 +320,7 @@ * chroma: for the complete mb: if score < 7 -> null */ -const uint8_t x264_decimate_table4[16] = -{ - 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 -}; -const uint8_t x264_decimate_table8[64] = -{ - 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1, - 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max ) +static ALWAYS_INLINE int decimate_score_internal( dctcoef *dct, int i_max ) { const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4; int i_score = 0; @@ -359,21 +347,21 @@ return i_score; } -static int x264_decimate_score15( dctcoef *dct ) +static int decimate_score15( dctcoef *dct ) { - return x264_decimate_score_internal( dct+1, 15 ); + return decimate_score_internal( dct+1, 15 ); } -static int x264_decimate_score16( dctcoef *dct ) +static int decimate_score16( dctcoef *dct ) { - return x264_decimate_score_internal( dct, 16 ); + return decimate_score_internal( dct, 16 ); } -static int x264_decimate_score64( dctcoef *dct ) +static int decimate_score64( dctcoef *dct ) { - return x264_decimate_score_internal( dct, 64 ); + return decimate_score_internal( dct, 64 ); } #define last(num)\ -static int x264_coeff_last##num( dctcoef *l )\ +static int coeff_last##num( dctcoef *l )\ {\ int i_last = num-1;\ while( i_last >= 0 && l[i_last] == 0 )\ @@ -388,9 +376,9 @@ last(64) #define level_run(num)\ -static int x264_coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\ +static int coeff_level_run##num( dctcoef *dct, x264_run_level_t *runlevel )\ {\ - int i_last = runlevel->last = x264_coeff_last##num(dct);\ + int i_last = runlevel->last = coeff_last##num(dct);\ int i_total = 0;\ int mask = 0;\ do\ @@ -438,20 +426,20 @@ pf->optimize_chroma_2x2_dc = optimize_chroma_2x2_dc; pf->optimize_chroma_2x4_dc = optimize_chroma_2x4_dc; - pf->denoise_dct = x264_denoise_dct; - pf->decimate_score15 = x264_decimate_score15; - pf->decimate_score16 = x264_decimate_score16; - pf->decimate_score64 = x264_decimate_score64; - - pf->coeff_last4 = x264_coeff_last4; - pf->coeff_last8 = x264_coeff_last8; - pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15; - pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16; - pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64; - pf->coeff_level_run4 = x264_coeff_level_run4; - pf->coeff_level_run8 = x264_coeff_level_run8; - pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15; - pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16; + pf->denoise_dct = denoise_dct; + pf->decimate_score15 = decimate_score15; + pf->decimate_score16 = decimate_score16; + pf->decimate_score64 = decimate_score64; + + pf->coeff_last4 = coeff_last4; + pf->coeff_last8 = coeff_last8; + pf->coeff_last[ DCT_LUMA_AC] = coeff_last15; + pf->coeff_last[ DCT_LUMA_4x4] = coeff_last16; + pf->coeff_last[ DCT_LUMA_8x8] = coeff_last64; + pf->coeff_level_run4 = coeff_level_run4; + pf->coeff_level_run8 = coeff_level_run8; + pf->coeff_level_run[ DCT_LUMA_AC] = coeff_level_run15; + pf->coeff_level_run[ DCT_LUMA_4x4] = coeff_level_run16; #if HIGH_BIT_DEPTH #if HAVE_MMX @@ -753,6 +741,7 @@ pf->quant_2x2_dc = x264_quant_2x2_dc_altivec; pf->quant_4x4_dc = x264_quant_4x4_dc_altivec; pf->quant_4x4 = x264_quant_4x4_altivec; + pf->quant_4x4x4 = x264_quant_4x4x4_altivec; pf->quant_8x8 = x264_quant_8x8_altivec; pf->dequant_4x4 = x264_dequant_4x4_altivec; @@ -767,7 +756,7 @@ pf->coeff_last8 = x264_coeff_last8_arm; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->quant_2x2_dc = x264_quant_2x2_dc_neon; @@ -787,7 +776,7 @@ pf->decimate_score64 = x264_decimate_score64_neon; } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_ARMV8 ) { pf->coeff_last4 = x264_coeff_last4_aarch64; diff -Nru x264-0.152.2854+gite9a5903/common/quant.h x264-0.158.2988+git-20191101.7817004/common/quant.h --- x264-0.152.2854+gite9a5903/common/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -69,6 +69,7 @@ int (*trellis_cabac_chroma_422_dc)( TRELLIS_PARAMS ); } x264_quant_function_t; +#define x264_quant_init x264_template(quant_init) void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/rectangle.c x264-0.158.2988+git-20191101.7817004/common/rectangle.c --- x264-0.152.2854+gite9a5903/common/rectangle.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/rectangle.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.c: rectangle filling ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Fiona Glaser * @@ -26,7 +26,7 @@ #include "common.h" #define CACHE_FUNC(name,size,width,height)\ -static void x264_macroblock_cache_##name##_##width##_##height( void *target, uint32_t val )\ +static void macroblock_cache_##name##_##width##_##height( void *target, uint32_t val )\ {\ x264_macroblock_cache_rect( target, width*size, height, size, val );\ } @@ -41,16 +41,16 @@ CACHE_FUNC(name,size,1,1)\ void (*x264_cache_##name##_func_table[10])(void *, uint32_t) =\ {\ - x264_macroblock_cache_##name##_1_1,\ - x264_macroblock_cache_##name##_2_1,\ - x264_macroblock_cache_##name##_1_2,\ - x264_macroblock_cache_##name##_2_2,\ + macroblock_cache_##name##_1_1,\ + macroblock_cache_##name##_2_1,\ + macroblock_cache_##name##_1_2,\ + macroblock_cache_##name##_2_2,\ NULL,\ - x264_macroblock_cache_##name##_4_2,\ + macroblock_cache_##name##_4_2,\ NULL,\ - x264_macroblock_cache_##name##_2_4,\ + macroblock_cache_##name##_2_4,\ NULL,\ - x264_macroblock_cache_##name##_4_4\ + macroblock_cache_##name##_4_4\ };\ CACHE_FUNCS(mv, 4) diff -Nru x264-0.152.2854+gite9a5903/common/rectangle.h x264-0.158.2988+git-20191101.7817004/common/rectangle.h --- x264-0.152.2854+gite9a5903/common/rectangle.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/rectangle.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.h: rectangle filling ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Fiona Glaser * Loren Merritt @@ -118,9 +118,12 @@ assert(0); } -extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);\ -extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);\ -extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);\ +#define x264_cache_mv_func_table x264_template(cache_mv_func_table) +extern void (*x264_cache_mv_func_table[10])(void *, uint32_t); +#define x264_cache_mvd_func_table x264_template(cache_mvd_func_table) +extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t); +#define x264_cache_ref_func_table x264_template(cache_ref_func_table) +extern void (*x264_cache_ref_func_table[10])(void *, uint32_t); #define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) ) static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv ) diff -Nru x264-0.152.2854+gite9a5903/common/set.c x264-0.158.2988+git-20191101.7817004/common/set.c --- x264-0.152.2854+gite9a5903/common/set.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/set.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.c: quantization init ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * @@ -94,7 +94,7 @@ int start = w == 8 ? 4 : 0;\ int j;\ for( j = 0; j < i; j++ )\ - if( !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ + if( !memcmp( h->sps->scaling_list[i+start], h->sps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ break;\ if( j < i )\ {\ @@ -110,7 +110,7 @@ }\ for( j = 0; j < i; j++ )\ if( deadzone[j] == deadzone[i] &&\ - !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ + !memcmp( h->sps->scaling_list[i+start], h->sps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ break;\ if( j < i )\ {\ @@ -148,14 +148,14 @@ for( int i_list = 0; i_list < 4; i_list++ ) for( int i = 0; i < 16; i++ ) { - h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i]; - quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]); + h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->sps->scaling_list[i_list][i]; + quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->sps->scaling_list[i_list][i]); } for( int i_list = 0; i_list < num_8x8_lists; i_list++ ) for( int i = 0; i < 64; i++ ) { - h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i]; - quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]); + h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->sps->scaling_list[4+i_list][i]; + quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->sps->scaling_list[4+i_list][i]); } } for( int q = 0; q <= QP_MAX_SPEC; q++ ) @@ -300,8 +300,8 @@ x264_free( h->nr_offset_emergency ); } -static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, - uint8_t *cqm, const uint8_t *jvt, int length ) +static int cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, + uint8_t *cqm, const uint8_t *jvt, int length ) { int i; @@ -361,16 +361,16 @@ while( (p = strchr( buf, '#' )) != NULL ) memset( p, ' ', strcspn( p, "\n" ) ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA", h->param.cqm_4iy, x264_cqm_jvt4i, 16 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_LUMA", h->param.cqm_4py, x264_cqm_jvt4p, 16 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA", h->param.cqm_8iy, x264_cqm_jvt8i, 64 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_LUMA", h->param.cqm_8py, x264_cqm_jvt8p, 64 ); + b_error |= cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA", h->param.cqm_4iy, x264_cqm_jvt4i, 16 ); + b_error |= cqm_parse_jmlist( h, buf, "INTER4X4_LUMA", h->param.cqm_4py, x264_cqm_jvt4p, 16 ); + b_error |= cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 ); + b_error |= cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 ); + b_error |= cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA", h->param.cqm_8iy, x264_cqm_jvt8i, 64 ); + b_error |= cqm_parse_jmlist( h, buf, "INTER8X8_LUMA", h->param.cqm_8py, x264_cqm_jvt8p, 64 ); if( CHROMA444 ) { - b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8ic, x264_cqm_jvt8i, 64 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8pc, x264_cqm_jvt8p, 64 ); + b_error |= cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8ic, x264_cqm_jvt8i, 64 ); + b_error |= cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8pc, x264_cqm_jvt8p, 64 ); } x264_free( buf ); diff -Nru x264-0.152.2854+gite9a5903/common/set.h x264-0.158.2988+git-20191101.7817004/common/set.h --- x264-0.152.2854+gite9a5903/common/set.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/set.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.h: quantization init ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -27,24 +27,6 @@ #ifndef X264_SET_H #define X264_SET_H -enum profile_e -{ - PROFILE_BASELINE = 66, - PROFILE_MAIN = 77, - PROFILE_HIGH = 100, - PROFILE_HIGH10 = 110, - PROFILE_HIGH422 = 122, - PROFILE_HIGH444_PREDICTIVE = 244, -}; - -enum chroma_format_e -{ - CHROMA_400 = 0, - CHROMA_420 = 1, - CHROMA_422 = 2, - CHROMA_444 = 3, -}; - enum cqm4_e { CQM_4IY = 0, @@ -158,6 +140,10 @@ int b_qpprime_y_zero_transform_bypass; int i_chroma_format_idc; + int b_avcintra; + int i_cqm_preset; + const uint8_t *scaling_list[8]; /* could be 12, but we don't allow separate Cb/Cr lists */ + } x264_sps_t; typedef struct @@ -187,161 +173,13 @@ int b_transform_8x8_mode; - int i_cqm_preset; - const uint8_t *scaling_list[8]; /* could be 12, but we don't allow separate Cb/Cr lists */ - } x264_pps_t; -/* default quant matrices */ -static const uint8_t x264_cqm_jvt4i[16] = -{ - 6,13,20,28, - 13,20,28,32, - 20,28,32,37, - 28,32,37,42 -}; -static const uint8_t x264_cqm_jvt4p[16] = -{ - 10,14,20,24, - 14,20,24,27, - 20,24,27,30, - 24,27,30,34 -}; -static const uint8_t x264_cqm_jvt8i[64] = -{ - 6,10,13,16,18,23,25,27, - 10,11,16,18,23,25,27,29, - 13,16,18,23,25,27,29,31, - 16,18,23,25,27,29,31,33, - 18,23,25,27,29,31,33,36, - 23,25,27,29,31,33,36,38, - 25,27,29,31,33,36,38,40, - 27,29,31,33,36,38,40,42 -}; -static const uint8_t x264_cqm_jvt8p[64] = -{ - 9,13,15,17,19,21,22,24, - 13,13,17,19,21,22,24,25, - 15,17,19,21,22,24,25,27, - 17,19,21,22,24,25,27,28, - 19,21,22,24,25,27,28,30, - 21,22,24,25,27,28,30,32, - 22,24,25,27,28,30,32,33, - 24,25,27,28,30,32,33,35 -}; -static const uint8_t x264_cqm_flat16[64] = -{ - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16, - 16,16,16,16,16,16,16,16 -}; -static const uint8_t * const x264_cqm_jvt[8] = -{ - x264_cqm_jvt4i, x264_cqm_jvt4p, - x264_cqm_jvt4i, x264_cqm_jvt4p, - x264_cqm_jvt8i, x264_cqm_jvt8p, - x264_cqm_jvt8i, x264_cqm_jvt8p -}; - -// 1080i25_avci50, 1080p25_avci50 -static const uint8_t x264_cqm_avci50_4ic[16] = -{ - 16,22,28,40, - 22,28,40,44, - 28,40,44,48, - 40,44,48,60 -}; - -// 1080i25_avci50, -static const uint8_t x264_cqm_avci50_1080i_8iy[64] = -{ - 16,18,19,21,27,33,81,87, - 18,19,21,24,30,33,81,87, - 19,21,24,27,30,78,84,90, - 21,24,27,30,33,78,84,90, - 24,27,30,33,78,81,84,90, - 24,27,30,33,78,81,84,93, - 27,30,33,78,78,81,87,93, - 30,33,33,78,81,84,87,96 -}; - -// 1080p25_avci50, 720p25_avci50, 720p50_avci50 -static const uint8_t x264_cqm_avci50_p_8iy[64] = -{ - 16,18,19,21,24,27,30,33, - 18,19,21,24,27,30,33,78, - 19,21,24,27,30,33,78,81, - 21,24,27,30,33,78,81,84, - 24,27,30,33,78,81,84,87, - 27,30,33,78,81,84,87,90, - 30,33,78,81,84,87,90,93, - 33,78,81,84,87,90,93,96 -}; - -// 1080i25_avci100, 1080p25_avci100 -static const uint8_t x264_cqm_avci100_1080_4ic[16] = -{ - 16,20,26,32, - 20,26,32,38, - 26,32,38,44, - 32,38,44,50 -}; - -// 720p25_avci100, 720p50_avci100 -static const uint8_t x264_cqm_avci100_720p_4ic[16] = -{ - 16,21,27,34, - 21,27,34,41, - 27,34,41,46, - 34,41,46,54 -}; - -// 1080i25_avci100, -static const uint8_t x264_cqm_avci100_1080i_8iy[64] = -{ - 16,19,20,23,24,26,32,42, - 18,19,22,24,26,32,36,42, - 18,20,23,24,26,32,36,63, - 19,20,23,26,32,36,42,63, - 20,22,24,26,32,36,59,63, - 22,23,24,26,32,36,59,68, - 22,23,24,26,32,42,59,68, - 22,23,24,26,36,42,59,72 -}; - -// 1080p25_avci100, -static const uint8_t x264_cqm_avci100_1080p_8iy[64] = -{ - 16,18,19,20,22,23,24,26, - 18,19,20,22,23,24,26,32, - 19,20,22,23,24,26,32,36, - 20,22,23,24,26,32,36,42, - 22,23,24,26,32,36,42,59, - 23,24,26,32,36,42,59,63, - 24,26,32,36,42,59,63,68, - 26,32,36,42,59,63,68,72 -}; - -// 720p25_avci100, 720p50_avci100 -static const uint8_t x264_cqm_avci100_720p_8iy[64] = -{ - 16,18,19,21,22,24,26,32, - 18,19,19,21,22,24,26,32, - 19,19,21,22,22,24,26,32, - 21,21,22,22,23,24,26,34, - 22,22,22,23,24,25,26,34, - 24,24,24,24,25,26,34,36, - 26,26,26,26,26,34,36,38, - 32,32,32,34,34,36,38,42 -}; - +#define x264_cqm_init x264_template(cqm_init) int x264_cqm_init( x264_t *h ); +#define x264_cqm_delete x264_template(cqm_delete) void x264_cqm_delete( x264_t *h ); +#define x264_cqm_parse_file x264_template(cqm_parse_file) int x264_cqm_parse_file( x264_t *h, const char *filename ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/tables.c x264-0.158.2988+git-20191101.7817004/common/tables.c --- x264-0.152.2854+gite9a5903/common/tables.c 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/tables.c 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,2539 @@ +/***************************************************************************** + * tables.c: const tables + ***************************************************************************** + * Copyright (C) 2003-2019 x264 project + * + * Authors: Laurent Aimar + * Loren Merritt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "base.h" + +const x264_level_t x264_levels[] = +{ + { 10, 1485, 99, 396, 64, 175, 64, 64, 0, 2, 0, 0, 1 }, + { 9, 1485, 99, 396, 128, 350, 64, 64, 0, 2, 0, 0, 1 }, /* "1b" */ + { 11, 3000, 396, 900, 192, 500, 128, 64, 0, 2, 0, 0, 1 }, + { 12, 6000, 396, 2376, 384, 1000, 128, 64, 0, 2, 0, 0, 1 }, + { 13, 11880, 396, 2376, 768, 2000, 128, 64, 0, 2, 0, 0, 1 }, + { 20, 11880, 396, 2376, 2000, 2000, 128, 64, 0, 2, 0, 0, 1 }, + { 21, 19800, 792, 4752, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 }, + { 22, 20250, 1620, 8100, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 }, + { 30, 40500, 1620, 8100, 10000, 10000, 256, 32, 22, 2, 0, 1, 0 }, + { 31, 108000, 3600, 18000, 14000, 14000, 512, 16, 60, 4, 1, 1, 0 }, + { 32, 216000, 5120, 20480, 20000, 20000, 512, 16, 60, 4, 1, 1, 0 }, + { 40, 245760, 8192, 32768, 20000, 25000, 512, 16, 60, 4, 1, 1, 0 }, + { 41, 245760, 8192, 32768, 50000, 62500, 512, 16, 24, 2, 1, 1, 0 }, + { 42, 522240, 8704, 34816, 50000, 62500, 512, 16, 24, 2, 1, 1, 1 }, + { 50, 589824, 22080, 110400, 135000, 135000, 512, 16, 24, 2, 1, 1, 1 }, + { 51, 983040, 36864, 184320, 240000, 240000, 512, 16, 24, 2, 1, 1, 1 }, + { 52, 2073600, 36864, 184320, 240000, 240000, 512, 16, 24, 2, 1, 1, 1 }, + { 60, 4177920, 139264, 696320, 240000, 240000, 8192, 16, 24, 2, 1, 1, 1 }, + { 61, 8355840, 139264, 696320, 480000, 480000, 8192, 16, 24, 2, 1, 1, 1 }, + { 62, 16711680, 139264, 696320, 800000, 800000, 8192, 16, 24, 2, 1, 1, 1 }, + { 0 } +}; + +/***************************************************************************** + * MATH + *****************************************************************************/ + +const uint8_t x264_exp2_lut[64] = +{ + 0, 3, 6, 8, 11, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45, + 48, 52, 55, 58, 62, 65, 69, 72, 76, 80, 83, 87, 91, 94, 98, 102, + 106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170, + 175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250 +}; + +const float x264_log2_lut[128] = +{ + 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682, + 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987, + 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840, + 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288, + 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370, + 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121, + 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570, + 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743, + 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662, + 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349, + 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819, + 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090, + 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175, + 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087, + 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837, + 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435, +}; + +/* Avoid an int/float conversion. */ +const float x264_log2_lz_lut[32] = +{ + 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +}; + +/***************************************************************************** + * ANALYSE + *****************************************************************************/ + +/* lambda = pow(2,qp/6-2) */ +const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */ + 1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */ + 2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */ + 4, 4, 5, 6, 6, 7, 8, 9, /* 24-31 */ + 10, 11, 13, 14, 16, 18, 20, 23, /* 32-39 */ + 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */ + 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */ + 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */ + 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */ +1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */ +2580,2896, /* 80-81 */ +}; + +/* lambda2 = pow(lambda,2) * .9 * 256 */ +/* Capped to avoid overflow */ +const int x264_lambda2_tab[QP_MAX_MAX+1] = +{ + 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */ + 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */ + 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */ + 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */ + 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */ + 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */ + 943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */ + 5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */ + 38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */ +134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */ +134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */ +}; + +// should the intra and inter lambdas be different? +// I'm just matching the behaviour of deadzone quant. +const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = +{ + // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS) + { + 46, 58, 73, 92, 117, 147, + 185, 233, 294, 370, 466, 587, + 740, 932, 1174, 1480, 1864, 2349, + 2959, 3728, 4697, 5918, 7457, 9395, + 11837, 14914, 18790, 23674, 29828, 37581, + 47349, 59656, 75163, 94699, 119313, 150326, + 189399, 238627, 300652, 378798, 477255, 601304, + 757596, 954511, 1202608, 1515192, 1909022, 2405217, + 3030384, 3818045, 4810435, 6060769, 7636091, 9620872, + 12121539, 15272182, 19241743, 24243077, 30544363, 38483486, + 48486154, 61088726, 76966972, 96972308, + 122177453,134217727,134217727,134217727,134217727,134217727, + 134217727,134217727,134217727,134217727,134217727,134217727, + }, + // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS) + { + 27, 34, 43, 54, 68, 86, + 108, 136, 172, 216, 273, 343, + 433, 545, 687, 865, 1090, 1374, + 1731, 2180, 2747, 3461, 4361, 5494, + 6922, 8721, 10988, 13844, 17442, 21976, + 27688, 34885, 43953, 55377, 69771, 87906, + 110755, 139543, 175813, 221511, 279087, 351627, + 443023, 558174, 703255, 886046, 1116348, 1406511, + 1772093, 2232697, 2813022, 3544186, 4465396, 5626046, + 7088374, 8930791, 11252092, 14176748, 17861583, 22504184, + 28353495, 35723165, 45008368, 56706990, + 71446330, 90016736,113413980,134217727,134217727,134217727, + 134217727,134217727,134217727,134217727,134217727,134217727, + 134217727,134217727,134217727,134217727,134217727,134217727, + } +}; + +const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = +{ + 16, 20, 25, 32, 40, 50, + 64, 80, 101, 128, 161, 203, + 256, 322, 406, 512, 645, 812, + 1024, 1290, 1625, 2048, 2580, 3250, + 4096, 5160, 6501, 8192, 10321, 13003, + 16384, 20642, 26007, 32768, 41285, 52015, + 65535 +}; + +/***************************************************************************** + * MC + *****************************************************************************/ + +const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; +const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2}; + +/***************************************************************************** + * CQM + *****************************************************************************/ + +/* default quant matrices */ +const uint8_t x264_cqm_jvt4i[16] = +{ + 6,13,20,28, + 13,20,28,32, + 20,28,32,37, + 28,32,37,42 +}; +const uint8_t x264_cqm_jvt4p[16] = +{ + 10,14,20,24, + 14,20,24,27, + 20,24,27,30, + 24,27,30,34 +}; +const uint8_t x264_cqm_jvt8i[64] = +{ + 6,10,13,16,18,23,25,27, + 10,11,16,18,23,25,27,29, + 13,16,18,23,25,27,29,31, + 16,18,23,25,27,29,31,33, + 18,23,25,27,29,31,33,36, + 23,25,27,29,31,33,36,38, + 25,27,29,31,33,36,38,40, + 27,29,31,33,36,38,40,42 +}; +const uint8_t x264_cqm_jvt8p[64] = +{ + 9,13,15,17,19,21,22,24, + 13,13,17,19,21,22,24,25, + 15,17,19,21,22,24,25,27, + 17,19,21,22,24,25,27,28, + 19,21,22,24,25,27,28,30, + 21,22,24,25,27,28,30,32, + 22,24,25,27,28,30,32,33, + 24,25,27,28,30,32,33,35 +}; +const uint8_t x264_cqm_flat16[64] = +{ + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16 +}; +const uint8_t * const x264_cqm_jvt[8] = +{ + x264_cqm_jvt4i, x264_cqm_jvt4p, + x264_cqm_jvt4i, x264_cqm_jvt4p, + x264_cqm_jvt8i, x264_cqm_jvt8p, + x264_cqm_jvt8i, x264_cqm_jvt8p +}; + +// 1080i25_avci50, 1080p25_avci50 +const uint8_t x264_cqm_avci50_4ic[16] = +{ + 16,22,28,40, + 22,28,40,44, + 28,40,44,48, + 40,44,48,60 +}; + +// 1080p25_avci50, 720p25_avci50, 720p50_avci50 +const uint8_t x264_cqm_avci50_p_8iy[64] = +{ + 16,18,19,21,24,27,30,33, + 18,19,21,24,27,30,33,78, + 19,21,24,27,30,33,78,81, + 21,24,27,30,33,78,81,84, + 24,27,30,33,78,81,84,87, + 27,30,33,78,81,84,87,90, + 30,33,78,81,84,87,90,93, + 33,78,81,84,87,90,93,96 +}; + +// 1080i25_avci50, +const uint8_t x264_cqm_avci50_1080i_8iy[64] = +{ + 16,18,19,21,27,33,81,87, + 18,19,21,24,30,33,81,87, + 19,21,24,27,30,78,84,90, + 21,24,27,30,33,78,84,90, + 24,27,30,33,78,81,84,90, + 24,27,30,33,78,81,84,93, + 27,30,33,78,78,81,87,93, + 30,33,33,78,81,84,87,96 +}; + +// 720p25_avci100, 720p50_avci100 +const uint8_t x264_cqm_avci100_720p_4ic[16] = +{ + 16,21,27,34, + 21,27,34,41, + 27,34,41,46, + 34,41,46,54 +}; + +// 720p25_avci100, 720p50_avci100 +const uint8_t x264_cqm_avci100_720p_8iy[64] = +{ + 16,18,19,21,22,24,26,32, + 18,19,19,21,22,24,26,32, + 19,19,21,22,22,24,26,32, + 21,21,22,22,23,24,26,34, + 22,22,22,23,24,25,26,34, + 24,24,24,24,25,26,34,36, + 26,26,26,26,26,34,36,38, + 32,32,32,34,34,36,38,42 +}; + +// 1080i25_avci100, 1080p25_avci100 +const uint8_t x264_cqm_avci100_1080_4ic[16] = +{ + 16,20,26,32, + 20,26,32,38, + 26,32,38,44, + 32,38,44,50 +}; + +// 1080i25_avci100, +const uint8_t x264_cqm_avci100_1080i_8iy[64] = +{ + 16,19,20,23,24,26,32,42, + 18,19,22,24,26,32,36,42, + 18,20,23,24,26,32,36,63, + 19,20,23,26,32,36,42,63, + 20,22,24,26,32,36,59,63, + 22,23,24,26,32,36,59,68, + 22,23,24,26,32,42,59,68, + 22,23,24,26,36,42,59,72 +}; + +// 1080p25_avci100, +const uint8_t x264_cqm_avci100_1080p_8iy[64] = +{ + 16,18,19,20,22,23,24,26, + 18,19,20,22,23,24,26,32, + 19,20,22,23,24,26,32,36, + 20,22,23,24,26,32,36,42, + 22,23,24,26,32,36,42,59, + 23,24,26,32,36,42,59,63, + 24,26,32,36,42,59,63,68, + 26,32,36,42,59,63,68,72 +}; + +/***************************************************************************** + * QUANT + *****************************************************************************/ + +const uint8_t x264_decimate_table4[16] = +{ + 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 +}; +const uint8_t x264_decimate_table8[64] = +{ + 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1, + 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +/***************************************************************************** + * DCT + *****************************************************************************/ + +/* the inverse of the scaling factors introduced by 8x8 fdct */ +/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */ +#define W(i) (i==0 ? FIX8(1.0000) :\ + i==1 ? FIX8(0.8859) :\ + i==2 ? FIX8(1.6000) :\ + i==3 ? FIX8(0.9415) :\ + i==4 ? FIX8(1.2651) :\ + i==5 ? FIX8(1.1910) :0) +const uint32_t x264_dct8_weight_tab[64] = { + W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + + W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1) +}; +#undef W + +#define W(i) (i==0 ? FIX8(1.76777) :\ + i==1 ? FIX8(1.11803) :\ + i==2 ? FIX8(0.70711) :0) +const uint32_t x264_dct4_weight_tab[16] = { + W(0), W(1), W(0), W(1), + W(1), W(2), W(1), W(2), + W(0), W(1), W(0), W(1), + W(1), W(2), W(1), W(2) +}; +#undef W + +/* inverse squared */ +#define W(i) (i==0 ? FIX8(3.125) :\ + i==1 ? FIX8(1.25) :\ + i==2 ? FIX8(0.5) :0) +const uint32_t x264_dct4_weight2_tab[16] = { + W(0), W(1), W(0), W(1), + W(1), W(2), W(1), W(2), + W(0), W(1), W(0), W(1), + W(1), W(2), W(1), W(2) +}; +#undef W + +#define W(i) (i==0 ? FIX8(1.00000) :\ + i==1 ? FIX8(0.78487) :\ + i==2 ? FIX8(2.56132) :\ + i==3 ? FIX8(0.88637) :\ + i==4 ? FIX8(1.60040) :\ + i==5 ? FIX8(1.41850) :0) +const uint32_t x264_dct8_weight2_tab[64] = { + W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + + W(0), W(3), W(4), W(3), W(0), W(3), W(4), W(3), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1), + W(4), W(5), W(2), W(5), W(4), W(5), W(2), W(5), + W(3), W(1), W(5), W(1), W(3), W(1), W(5), W(1) +}; +#undef W + +/***************************************************************************** + * CABAC + *****************************************************************************/ + +const int8_t x264_cabac_context_init_I[1024][2] = +{ + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28,127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 unused for I */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, + + /* 24- 39 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + /* 40 - 53 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, + + /* 54 - 59 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 -> 87 */ + { 0, 11 }, { 1, 55 }, { 0, 69 }, { -17, 127 }, + { -13, 102 },{ 0, 82 }, { -7, 74 }, { -21, 107 }, + { -27, 127 },{ -31, 127 },{ -24, 127 }, { -18, 95 }, + { -27, 127 },{ -21, 114 },{ -30, 127 }, { -17, 123 }, + { -12, 115 },{ -16, 122 }, + + /* 88 -> 104 */ + { -11, 115 },{ -12, 63 }, { -2, 68 }, { -15, 84 }, + { -13, 104 },{ -3, 70 }, { -8, 93 }, { -10, 90 }, + { -30, 127 },{ -1, 74 }, { -6, 97 }, { -7, 91 }, + { -20, 127 },{ -4, 56 }, { -5, 82 }, { -7, 76 }, + { -22, 125 }, + + /* 105 -> 135 */ + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 },{ -15, 100 }, + + /* 136 -> 165 */ + { -13, 101 },{ -13, 91 }, { -12, 94 }, { -10, 88 }, + { -16, 84 }, { -10, 86 }, { -7, 83 }, { -13, 87 }, + { -19, 94 }, { 1, 70 }, { 0, 72 }, { -5, 74 }, + { 18, 59 }, { -8, 102 }, { -15, 100 }, { 0, 95 }, + { -4, 75 }, { 2, 72 }, { -11, 75 }, { -3, 71 }, + { 15, 46 }, { -13, 69 }, { 0, 62 }, { 0, 65 }, + { 21, 37 }, { -15, 72 }, { 9, 57 }, { 16, 54 }, + { 0, 62 }, { 12, 72 }, + + /* 166 -> 196 */ + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, + + /* 197 -> 226 */ + { 26, -17 }, { 30, -25 }, { 28, -20 }, { 33, -23 }, + { 37, -27 }, { 33, -23 }, { 40, -28 }, { 38, -17 }, + { 33, -11 }, { 40, -15 }, { 41, -6 }, { 38, 1 }, + { 41, 17 }, { 30, -6 }, { 27, 3 }, { 26, 22 }, + { 37, -16 }, { 35, -4 }, { 38, -8 }, { 38, -3 }, + { 37, 3 }, { 38, 5 }, { 42, 0 }, { 35, 16 }, + { 39, 22 }, { 14, 48 }, { 27, 37 }, { 21, 60 }, + { 12, 68 }, { 2, 97 }, + + /* 227 -> 251 */ + { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, + { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, + { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, + { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, + { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, + { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, + { -4, 65 }, + + /* 252 -> 275 */ + { -12, 73 }, { -8, 76 }, { -7, 80 }, { -9, 88 }, + { -17, 110 },{ -11, 97 }, { -20, 84 }, { -11, 79 }, + { -6, 73 }, { -4, 74 }, { -13, 86 }, { -13, 96 }, + { -11, 97 }, { -19, 117 },{ -8, 78 }, { -5, 33 }, + { -4, 48 }, { -2, 53 }, { -3, 62 }, { -13, 71 }, + { -10, 79 }, { -12, 86 }, { -13, 90 }, { -14, 97 }, + + /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ + { 0, 0 }, + + /* 277 -> 307 */ + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 },{ -11, 97 }, + + /* 308 -> 337 */ + { -16, 96 }, { -7, 88 }, { -8, 85 }, { -7, 85 }, + { -9, 85 }, { -13, 88 }, { 4, 66 }, { -3, 77 }, + { -3, 76 }, { -6, 76 }, { 10, 58 }, { -1, 76 }, + { -1, 83 }, { -7, 99 }, { -14, 95 }, { 2, 95 }, + { 0, 76 }, { -5, 74 }, { 0, 70 }, { -11, 75 }, + { 1, 68 }, { 0, 65 }, { -14, 73 }, { 3, 62 }, + { 4, 62 }, { -1, 68 }, { -13, 75 }, { 11, 55 }, + { 5, 64 }, { 12, 70 }, + + /* 338 -> 368 */ + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 },{ 36, -35 }, { 36, -34 }, + + /* 369 -> 398 */ + { 32, -26 }, { 37, -30 }, { 44, -32 }, { 34, -18 }, + { 34, -15 }, { 40, -15 }, { 33, -7 }, { 35, -5 }, + { 33, 0 }, { 38, 2 }, { 33, 13 }, { 23, 35 }, + { 13, 58 }, { 29, -3 }, { 26, 0 }, { 22, 30 }, + { 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 }, + { 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 }, + { 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 }, + { 29, 39 }, { 19, 66 }, + + /* 399 -> 435 */ + { 31, 21 }, { 31, 31 }, { 25, 50 }, + { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, + { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, + { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, + { -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 }, + { 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 }, + { 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 }, + { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, + { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, + { 0, 68 }, { -9, 92 }, + + /* 436 -> 459 */ + { -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 }, + { -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 }, + { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 }, + { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 }, + { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, + + /* 460 -> 1024 */ + { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, + { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -17, 123 }, { -12, 115 }, { -16, 122 }, { -11, 115 }, + { -12, 63 }, { -2, 68 }, { -15, 84 }, { -13, 104 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, + { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, + { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, + { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 }, { -15, 100 }, { -13, 101 }, + { -13, 91 }, { -12, 94 }, { -10, 88 }, { -16, 84 }, + { -10, 86 }, { -7, 83 }, { -13, 87 }, { -19, 94 }, + { 1, 70 }, { 0, 72 }, { -5, 74 }, { 18, 59 }, + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, + { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, + { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, + { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, { 26, -17 }, + { 30, -25 }, { 28, -20 }, { 33, -23 }, { 37, -27 }, + { 33, -23 }, { 40, -28 }, { 38, -17 }, { 33, -11 }, + { 40, -15 }, { 41, -6 }, { 38, 1 }, { 41, 17 }, + { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, + { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, + { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, + { -23, 68 }, { -24, 50 }, { -11, 74 }, { -14, 106 }, + { -13, 97 }, { -15, 90 }, { -12, 90 }, { -18, 88 }, + { -10, 73 }, { -9, 79 }, { -14, 86 }, { -10, 73 }, + { -10, 70 }, { -10, 69 }, { -5, 66 }, { -9, 64 }, + { -5, 58 }, { 2, 59 }, { 23, -13 }, { 26, -13 }, + { 40, -15 }, { 49, -14 }, { 44, 3 }, { 45, 6 }, + { 44, 34 }, { 33, 54 }, { 19, 82 }, { 21, -10 }, + { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }, + { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, + { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, + { 0, 68 }, { -9, 92 }, { -17, 120 }, { -20, 112 }, + { -18, 114 }, { -11, 85 }, { -15, 92 }, { -14, 89 }, + { -26, 71 }, { -15, 81 }, { -14, 80 }, { 0, 68 }, + { -14, 70 }, { -24, 56 }, { -23, 68 }, { -24, 50 }, + { -11, 74 }, { -14, 106 }, { -13, 97 }, { -15, 90 }, + { -12, 90 }, { -18, 88 }, { -10, 73 }, { -9, 79 }, + { -14, 86 }, { -10, 73 }, { -10, 70 }, { -10, 69 }, + { -5, 66 }, { -9, 64 }, { -5, 58 }, { 2, 59 }, + { 23, -13 }, { 26, -13 }, { 40, -15 }, { 49, -14 }, + { 44, 3 }, { 45, 6 }, { 44, 34 }, { 33, 54 }, + { 19, 82 }, { 21, -10 }, { 24, -11 }, { 28, -8 }, + { 28, -1 }, { 29, 3 }, { 29, 9 }, { 35, 20 }, + { 29, 36 }, { 14, 67 }, { -3, 75 }, { -1, 23 }, + { 1, 34 }, { 1, 43 }, { 0, 54 }, { -2, 55 }, + { 0, 61 }, { 1, 64 }, { 0, 68 }, { -9, 92 }, + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, + { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, + { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, + { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 }, { -11, 97 }, { -16, 96 }, + { -7, 88 }, { -8, 85 }, { -7, 85 }, { -9, 85 }, + { -13, 88 }, { 4, 66 }, { -3, 77 }, { -3, 76 }, + { -6, 76 }, { 10, 58 }, { -1, 76 }, { -1, 83 }, + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, + { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, + { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, + { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 }, { 36, -35 }, { 36, -34 }, { 32, -26 }, + { 37, -30 }, { 44, -32 }, { 34, -18 }, { 34, -15 }, + { 40, -15 }, { 33, -7 }, { 35, -5 }, { 33, 0 }, + { 38, 2 }, { 33, 13 }, { 23, 35 }, { 13, 58 }, + { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, + { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, + { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, + { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, + { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, + { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, + { -4, 65 }, { -12, 73 }, { -8, 76 }, { -7, 80 }, + { -9, 88 }, { -17, 110 }, { -3, 71 }, { -6, 42 }, + { -5, 50 }, { -3, 54 }, { -2, 62 }, { 0, 58 }, + { 1, 63 }, { -2, 72 }, { -1, 74 }, { -9, 91 }, + { -5, 67 }, { -5, 27 }, { -3, 39 }, { -2, 44 }, + { 0, 46 }, { -16, 64 }, { -8, 68 }, { -10, 78 }, + { -6, 77 }, { -10, 86 }, { -12, 92 }, { -15, 55 }, + { -10, 60 }, { -6, 62 }, { -4, 65 }, { -12, 73 }, + { -8, 76 }, { -7, 80 }, { -9, 88 }, { -17, 110 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 }, + { -3, 70 }, { -8, 93 }, { -10, 90 }, { -30, 127 } +}; + +const int8_t x264_cabac_context_init_PB[3][1024][2] = +{ + /* i_cabac_init_idc == 0 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 23, 33 }, { 23, 2 }, { 21, 0 }, { 1, 9 }, + { 0, 49 }, { -37, 118 }, { 5, 57 }, { -13, 78 }, + { -11, 65 }, { 1, 62 }, { 12, 49 }, { -4, 73 }, + { 17, 50 }, + + /* 24 - 39 */ + { 18, 64 }, { 9, 43 }, { 29, 0 }, { 26, 67 }, + { 16, 90 }, { 9, 104 }, { -46, 127 }, { -20, 104 }, + { 1, 67 }, { -13, 78 }, { -11, 65 }, { 1, 62 }, + { -6, 86 }, { -17, 95 }, { -6, 61 }, { 9, 45 }, + + /* 40 - 53 */ + { -3, 69 }, { -6, 81 }, { -11, 96 }, { 6, 55 }, + { 7, 67 }, { -5, 86 }, { 2, 88 }, { 0, 58 }, + { -3, 76 }, { -10, 94 }, { 5, 54 }, { 4, 69 }, + { -3, 81 }, { 0, 88 }, + + /* 54 - 59 */ + { -7, 67 }, { -5, 74 }, { -4, 74 }, { -5, 80 }, + { -7, 72 }, { 1, 58 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 87 */ + { 0, 45 }, { -4, 78 }, { -3, 96 }, { -27, 126 }, + { -28, 98 }, { -25, 101 }, { -23, 67 }, { -28, 82 }, + { -20, 94 }, { -16, 83 }, { -22, 110 }, { -21, 91 }, + { -18, 102 }, { -13, 93 }, { -29, 127 }, { -7, 92 }, + { -5, 89 }, { -7, 96 }, { -13, 108 }, { -3, 46 }, + { -1, 65 }, { -1, 57 }, { -9, 93 }, { -3, 74 }, + { -9, 92 }, { -8, 87 }, { -23, 126 }, { 5, 54 }, + { 6, 60 }, { 6, 59 }, { 6, 69 }, { -1, 48 }, + { 0, 68 }, { -4, 69 }, { -8, 88 }, + + /* 105 -> 165 */ + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { 3, 64 }, { 1, 61 }, { 9, 63 }, { 7, 50 }, + { 16, 39 }, { 5, 44 }, { 4, 52 }, { 11, 48 }, + { -5, 60 }, { -1, 59 }, { 0, 59 }, { 22, 33 }, + { 5, 44 }, { 14, 43 }, { -1, 78 }, { 0, 60 }, + { 9, 69 }, + + /* 166 - 226 */ + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { 1, 67 }, { 5, 59 }, { 9, 67 }, { 16, 30 }, + { 18, 32 }, { 18, 35 }, { 22, 29 }, { 24, 31 }, + { 23, 38 }, { 18, 43 }, { 20, 41 }, { 11, 63 }, + { 9, 59 }, { 9, 64 }, { -1, 94 }, { -2, 89 }, + { -9, 108 }, + + /* 227 - 275 */ + { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, + { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, + { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, + { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, + { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, + { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, + { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, + { -3, 74 }, { -10, 90 }, { 0, 70 }, { -4, 29 }, + { 5, 31 }, { 7, 42 }, { 1, 59 }, { -2, 58 }, + { -3, 72 }, { -3, 81 }, { -11, 97 }, { 0, 58 }, + { 8, 5 }, { 10, 14 }, { 14, 18 }, { 13, 27 }, + { 2, 40 }, { 0, 58 }, { -3, 70 }, { -6, 79 }, + { -8, 85 }, + + /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { -2, 69 }, { -2, 59 }, { 6, 70 }, { 10, 44 }, + { 9, 31 }, { 12, 43 }, { 3, 53 }, { 14, 34 }, + { 10, 38 }, { -3, 52 }, { 13, 40 }, { 17, 32 }, + { 7, 44 }, { 7, 38 }, { 13, 50 }, { 10, 57 }, + { 26, 43 }, + + /* 338 - 398 */ + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { 8, 60 }, { 6, 63 }, { 17, 65 }, { 21, 24 }, + { 23, 20 }, { 26, 23 }, { 27, 32 }, { 28, 23 }, + { 28, 24 }, { 23, 40 }, { 24, 32 }, { 28, 29 }, + { 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 }, + { 11, 86 }, + + /* 399 -> 435 */ + { 12, 40 }, { 11, 51 }, { 14, 59 }, + { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, + { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, + { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, + { -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 }, + { 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 }, + { 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 }, + { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, + { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, + { -8, 66 }, { -8, 76 }, + + /* 436 -> 459 */ + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 }, + { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, + { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, + + /* 460 - 1024 */ + { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, + { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -7, 92 }, { -5, 89 }, { -7, 96 }, { -13, 108 }, + { -3, 46 }, { -1, 65 }, { -1, 57 }, { -9, 93 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, + { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, + { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, + { -16, 66 }, { -22, 65 }, { -20, 63 }, { -5, 85 }, + { -6, 81 }, { -10, 77 }, { -7, 81 }, { -17, 80 }, + { -18, 73 }, { -4, 74 }, { -10, 83 }, { -9, 71 }, + { -9, 67 }, { -1, 61 }, { -8, 66 }, { -14, 66 }, + { 0, 59 }, { 2, 59 }, { 9, -2 }, { 26, -9 }, + { 33, -9 }, { 39, -7 }, { 41, -2 }, { 45, 3 }, + { 49, 9 }, { 45, 27 }, { 36, 59 }, { 21, -13 }, + { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, + { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, + { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, + { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, + { -8, 66 }, { -8, 76 }, { -4, 79 }, { -7, 71 }, + { -5, 69 }, { -9, 70 }, { -8, 66 }, { -10, 68 }, + { -19, 73 }, { -12, 69 }, { -16, 70 }, { -15, 67 }, + { -20, 62 }, { -19, 70 }, { -16, 66 }, { -22, 65 }, + { -20, 63 }, { -5, 85 }, { -6, 81 }, { -10, 77 }, + { -7, 81 }, { -17, 80 }, { -18, 73 }, { -4, 74 }, + { -10, 83 }, { -9, 71 }, { -9, 67 }, { -1, 61 }, + { -8, 66 }, { -14, 66 }, { 0, 59 }, { 2, 59 }, + { 9, -2 }, { 26, -9 }, { 33, -9 }, { 39, -7 }, + { 41, -2 }, { 45, 3 }, { 49, 9 }, { 45, 27 }, + { 36, 59 }, { 21, -13 }, { 33, -14 }, { 39, -7 }, + { 46, -2 }, { 51, 2 }, { 60, 6 }, { 61, 17 }, + { 55, 34 }, { 42, 62 }, { -6, 66 }, { -7, 35 }, + { -7, 42 }, { -8, 45 }, { -5, 48 }, { -12, 56 }, + { -6, 60 }, { -5, 62 }, { -8, 66 }, { -8, 76 }, + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, + { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, + { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, + { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, + { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, + { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, + { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, + { -3, 74 }, { -10, 90 }, { -6, 76 }, { -2, 44 }, + { 0, 45 }, { 0, 52 }, { -3, 64 }, { -2, 59 }, + { -4, 70 }, { -4, 75 }, { -8, 82 }, { -17, 102 }, + { -9, 77 }, { 3, 24 }, { 0, 42 }, { 0, 48 }, + { 0, 55 }, { -6, 59 }, { -7, 71 }, { -12, 83 }, + { -11, 87 }, { -30, 119 }, { 1, 58 }, { -3, 29 }, + { -1, 36 }, { 1, 38 }, { 2, 43 }, { -6, 55 }, + { 0, 58 }, { 0, 64 }, { -3, 74 }, { -10, 90 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 }, + { -3, 74 }, { -9, 92 }, { -8, 87 }, { -23, 126 } + }, + + /* i_cabac_init_idc == 1 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 22, 25 }, { 34, 0 }, { 16, 0 }, { -2, 9 }, + { 4, 41 }, { -29, 118 }, { 2, 65 }, { -6, 71 }, + { -13, 79 }, { 5, 52 }, { 9, 50 }, { -3, 70 }, + { 10, 54 }, + + /* 24 - 39 */ + { 26, 34 }, { 19, 22 }, { 40, 0 }, { 57, 2 }, + { 41, 36 }, { 26, 69 }, { -45, 127 }, { -15, 101 }, + { -4, 76 }, { -6, 71 }, { -13, 79 }, { 5, 52 }, + { 6, 69 }, { -13, 90 }, { 0, 52 }, { 8, 43 }, + + /* 40 - 53 */ + { -2, 69 },{ -5, 82 },{ -10, 96 },{ 2, 59 }, + { 2, 75 },{ -3, 87 },{ -3, 100 },{ 1, 56 }, + { -3, 74 },{ -6, 85 },{ 0, 59 },{ -3, 81 }, + { -7, 86 },{ -5, 95 }, + + /* 54 - 59 */ + { -1, 66 },{ -1, 77 },{ 1, 70 },{ -2, 86 }, + { -5, 72 },{ 0, 61 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 104 */ + { 13, 15 }, { 7, 51 }, { 2, 80 }, { -39, 127 }, + { -18, 91 }, { -17, 96 }, { -26, 81 }, { -35, 98 }, + { -24, 102 }, { -23, 97 }, { -27, 119 }, { -24, 99 }, + { -21, 110 }, { -18, 102 }, { -36, 127 }, { 0, 80 }, + { -5, 89 }, { -7, 94 }, { -4, 92 }, { 0, 39 }, + { 0, 65 }, { -15, 84 }, { -35, 127 }, { -2, 73 }, + { -12, 104 }, { -9, 91 }, { -31, 127 }, { 3, 55 }, + { 7, 56 }, { 7, 55 }, { 8, 61 }, { -3, 53 }, + { 0, 68 }, { -7, 74 }, { -9, 88 }, + + /* 105 -> 165 */ + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { -4, 71 }, { 0, 58 }, { 7, 61 }, { 9, 41 }, + { 18, 25 }, { 9, 32 }, { 5, 43 }, { 9, 47 }, + { 0, 44 }, { 0, 51 }, { 2, 46 }, { 19, 38 }, + { -4, 66 }, { 15, 38 }, { 12, 42 }, { 9, 34 }, + { 0, 89 }, + + /* 166 - 226 */ + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { 0, 75 }, { 2, 72 }, { 8, 77 }, { 14, 35 }, + { 18, 31 }, { 17, 35 }, { 21, 30 }, { 17, 45 }, + { 20, 42 }, { 18, 45 }, { 27, 26 }, { 16, 54 }, + { 7, 66 }, { 16, 56 }, { 11, 73 }, { 10, 67 }, + { -10, 116 }, + + /* 227 - 275 */ + { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, + { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, + { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, + { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, + { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, + { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, + { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, + { -5, 74 }, { -9, 86 }, { 2, 66 }, { -9, 34 }, + { 1, 32 }, { 11, 31 }, { 5, 52 }, { -2, 55 }, + { -2, 67 }, { 0, 73 }, { -8, 89 }, { 3, 52 }, + { 7, 4 }, { 10, 8 }, { 17, 8 }, { 16, 19 }, + { 3, 37 }, { -1, 61 }, { -5, 73 }, { -1, 70 }, + { -4, 78 }, + + /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { -1, 70 }, { -9, 72 }, { 14, 60 }, { 16, 37 }, + { 0, 47 }, { 18, 35 }, { 11, 37 }, { 12, 41 }, + { 10, 41 }, { 2, 48 }, { 12, 41 }, { 13, 41 }, + { 0, 59 }, { 3, 50 }, { 19, 40 }, { 3, 66 }, + { 18, 50 }, + + /* 338 - 398 */ + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { 12, 48 }, { 11, 49 }, { 26, 45 }, { 22, 22 }, + { 23, 22 }, { 27, 21 }, { 33, 20 }, { 26, 28 }, + { 30, 24 }, { 27, 34 }, { 18, 42 }, { 25, 39 }, + { 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 }, + { 11, 83 }, + + /* 399 -> 435 */ + { 25, 32 }, { 21, 49 }, { 21, 54 }, + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, + { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, + { -4, 67 }, { -7, 82 }, + + /* 436 -> 459 */ + { -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 }, + { -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 }, + { -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 }, + { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + + /* 460 - 1024 */ + { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, + { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { 0, 80 }, { -5, 89 }, { -7, 94 }, { -4, 92 }, + { 0, 39 }, { 0, 65 }, { -15, 84 }, { -35, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { -3, 81 }, + { -3, 76 }, { -7, 72 }, { -6, 78 }, { -12, 72 }, + { -14, 68 }, { -3, 70 }, { -6, 76 }, { -5, 66 }, + { -5, 62 }, { 0, 57 }, { -4, 61 }, { -9, 60 }, + { 1, 54 }, { 2, 58 }, { 17, -10 }, { 32, -13 }, + { 42, -9 }, { 49, -5 }, { 53, 0 }, { 64, 3 }, + { 68, 10 }, { 66, 27 }, { 47, 57 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, + { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, + { -4, 67 }, { -7, 82 }, { -5, 85 }, { -6, 81 }, + { -10, 77 }, { -7, 81 }, { -17, 80 }, { -18, 73 }, + { -4, 74 }, { -10, 83 }, { -9, 71 }, { -9, 67 }, + { -1, 61 }, { -8, 66 }, { -14, 66 }, { 0, 59 }, + { 2, 59 }, { -3, 81 }, { -3, 76 }, { -7, 72 }, + { -6, 78 }, { -12, 72 }, { -14, 68 }, { -3, 70 }, + { -6, 76 }, { -5, 66 }, { -5, 62 }, { 0, 57 }, + { -4, 61 }, { -9, 60 }, { 1, 54 }, { 2, 58 }, + { 17, -10 }, { 32, -13 }, { 42, -9 }, { 49, -5 }, + { 53, 0 }, { 64, 3 }, { 68, 10 }, { 66, 27 }, + { 47, 57 }, { 17, -10 }, { 32, -13 }, { 42, -9 }, + { 49, -5 }, { 53, 0 }, { 64, 3 }, { 68, 10 }, + { 66, 27 }, { 47, 57 }, { -5, 71 }, { 0, 24 }, + { -1, 36 }, { -2, 42 }, { -2, 52 }, { -9, 57 }, + { -6, 63 }, { -4, 65 }, { -4, 67 }, { -7, 82 }, + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, + { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, + { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, + { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, + { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, + { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, + { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, + { -5, 74 }, { -9, 86 }, { -23, 112 }, { -15, 71 }, + { -7, 61 }, { 0, 53 }, { -5, 66 }, { -11, 77 }, + { -9, 80 }, { -9, 84 }, { -10, 87 }, { -34, 127 }, + { -21, 101 }, { -3, 39 }, { -5, 53 }, { -7, 61 }, + { -11, 75 }, { -15, 77 }, { -17, 91 }, { -25, 107 }, + { -25, 111 }, { -28, 122 }, { -11, 76 }, { -10, 44 }, + { -10, 52 }, { -10, 57 }, { -9, 58 }, { -16, 72 }, + { -7, 69 }, { -4, 69 }, { -5, 74 }, { -9, 86 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 }, + { -2, 73 }, { -12, 104 }, { -9, 91 }, { -31, 127 } + }, + + /* i_cabac_init_idc == 2 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 29, 16 }, { 25, 0 }, { 14, 0 }, { -10, 51 }, + { -3, 62 }, { -27, 99 }, { 26, 16 }, { -4, 85 }, + { -24, 102 }, { 5, 57 }, { 6, 57 }, { -17, 73 }, + { 14, 57 }, + + /* 24 - 39 */ + { 20, 40 }, { 20, 10 }, { 29, 0 }, { 54, 0 }, + { 37, 42 }, { 12, 97 }, { -32, 127 }, { -22, 117 }, + { -2, 74 }, { -4, 85 }, { -24, 102 }, { 5, 57 }, + { -6, 93 }, { -14, 88 }, { -6, 44 }, { 4, 55 }, + + /* 40 - 53 */ + { -11, 89 },{ -15, 103 },{ -21, 116 },{ 19, 57 }, + { 20, 58 },{ 4, 84 },{ 6, 96 },{ 1, 63 }, + { -5, 85 },{ -13, 106 },{ 5, 63 },{ 6, 75 }, + { -3, 90 },{ -1, 101 }, + + /* 54 - 59 */ + { 3, 55 },{ -4, 79 },{ -2, 75 },{ -12, 97 }, + { -7, 50 },{ 1, 60 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 104 */ + { 7, 34 }, { -9, 88 }, { -20, 127 }, { -36, 127 }, + { -17, 91 }, { -14, 95 }, { -25, 84 }, { -25, 86 }, + { -12, 89 }, { -17, 91 }, { -31, 127 }, { -14, 76 }, + { -18, 103 }, { -13, 90 }, { -37, 127 }, { 11, 80 }, + { 5, 76 }, { 2, 84 }, { 5, 78 }, { -6, 55 }, + { 4, 61 }, { -14, 83 }, { -37, 127 }, { -5, 79 }, + { -11, 104 }, { -11, 91 }, { -30, 127 }, { 0, 65 }, + { -2, 79 }, { 0, 72 }, { -4, 92 }, { -6, 56 }, + { 3, 68 }, { -8, 71 }, { -13, 98 }, + + /* 105 -> 165 */ + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { 3, 65 }, { -7, 69 }, { 8, 77 }, { -10, 66 }, + { 3, 62 }, { -3, 68 }, { -20, 81 }, { 0, 30 }, + { 1, 7 }, { -3, 23 }, { -21, 74 }, { 16, 66 }, + { -23, 124 }, { 17, 37 }, { 44, -18 }, { 50, -34 }, + { -22, 127 }, + + /* 166 - 226 */ + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { 20, 34 }, { 19, 31 }, { 27, 44 }, { 19, 16 }, + { 15, 36 }, { 15, 36 }, { 21, 28 }, { 25, 21 }, + { 30, 20 }, { 31, 12 }, { 27, 16 }, { 24, 42 }, + { 0, 93 }, { 14, 56 }, { 15, 57 }, { 26, 38 }, + { -24, 127 }, + + /* 227 - 275 */ + { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, + { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, + { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, + { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, + { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, + { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, + { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, + { -12, 92 }, { -18, 108 }, { -4, 79 }, { -22, 69 }, + { -16, 75 }, { -2, 58 }, { 1, 58 }, { -13, 78 }, + { -9, 83 }, { -4, 81 }, { -13, 99 }, { -13, 81 }, + { -6, 38 }, { -13, 62 }, { -6, 58 }, { -2, 59 }, + { -16, 73 }, { -10, 76 }, { -13, 86 }, { -9, 83 }, + { -10, 87 }, + + /* 276 a bit special (not used, x264_cabac_encode_bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { -2, 76 }, { -18, 86 }, { 12, 70 }, { 5, 64 }, + { -12, 70 }, { 11, 55 }, { 5, 56 }, { 0, 69 }, + { 2, 65 }, { -6, 74 }, { 5, 54 }, { 7, 54 }, + { -6, 76 }, { -11, 82 }, { -2, 77 }, { -2, 77 }, + { 25, 42 }, + + /* 338 - 398 */ + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { 18, 31 }, { 19, 26 }, { 36, 24 }, { 24, 23 }, + { 27, 16 }, { 24, 30 }, { 31, 29 }, { 22, 41 }, + { 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 }, + { 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 }, + { 25, 61 }, + + /* 399 -> 435 */ + { 21, 33 }, { 19, 50 }, { 17, 61 }, + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, + { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, + { -6, 68 }, { -10, 79 }, + + /* 436 -> 459 */ + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + + /* 460 - 1024 */ + { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, + { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { 11, 80 }, { 5, 76 }, { 2, 84 }, { 5, 78 }, + { -6, 55 }, { 4, 61 }, { -14, 83 }, { -37, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { -3, 78 }, + { -8, 74 }, { -9, 72 }, { -10, 72 }, { -18, 75 }, + { -12, 71 }, { -11, 63 }, { -5, 70 }, { -17, 75 }, + { -14, 72 }, { -16, 67 }, { -8, 53 }, { -14, 59 }, + { -9, 52 }, { -11, 68 }, { 9, -2 }, { 30, -10 }, + { 31, -4 }, { 33, -1 }, { 33, 7 }, { 31, 12 }, + { 37, 23 }, { 31, 38 }, { 20, 64 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, + { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, + { -6, 68 }, { -10, 79 }, { -3, 78 }, { -8, 74 }, + { -9, 72 }, { -10, 72 }, { -18, 75 }, { -12, 71 }, + { -11, 63 }, { -5, 70 }, { -17, 75 }, { -14, 72 }, + { -16, 67 }, { -8, 53 }, { -14, 59 }, { -9, 52 }, + { -11, 68 }, { -3, 78 }, { -8, 74 }, { -9, 72 }, + { -10, 72 }, { -18, 75 }, { -12, 71 }, { -11, 63 }, + { -5, 70 }, { -17, 75 }, { -14, 72 }, { -16, 67 }, + { -8, 53 }, { -14, 59 }, { -9, 52 }, { -11, 68 }, + { 9, -2 }, { 30, -10 }, { 31, -4 }, { 33, -1 }, + { 33, 7 }, { 31, 12 }, { 37, 23 }, { 31, 38 }, + { 20, 64 }, { 9, -2 }, { 30, -10 }, { 31, -4 }, + { 33, -1 }, { 33, 7 }, { 31, 12 }, { 37, 23 }, + { 31, 38 }, { 20, 64 }, { -9, 71 }, { -7, 37 }, + { -8, 44 }, { -11, 49 }, { -10, 56 }, { -12, 59 }, + { -8, 63 }, { -9, 67 }, { -6, 68 }, { -10, 79 }, + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, + { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, + { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, + { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, + { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, + { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, + { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, + { -12, 92 }, { -18, 108 }, { -24, 115 }, { -22, 82 }, + { -9, 62 }, { 0, 53 }, { 0, 59 }, { -14, 85 }, + { -13, 89 }, { -13, 94 }, { -11, 92 }, { -29, 127 }, + { -21, 100 }, { -14, 57 }, { -12, 67 }, { -11, 71 }, + { -10, 77 }, { -21, 85 }, { -16, 88 }, { -23, 104 }, + { -15, 98 }, { -37, 127 }, { -10, 82 }, { -8, 48 }, + { -8, 61 }, { -8, 66 }, { -7, 70 }, { -14, 75 }, + { -10, 79 }, { -9, 83 }, { -12, 92 }, { -18, 108 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 }, + { -5, 79 }, { -11, 104 }, { -11, 91 }, { -30, 127 } + } +}; + +const uint8_t x264_cabac_range_lps[64][4] = +{ + { 2, 2, 2, 2}, { 6, 7, 8, 9}, { 6, 7, 9, 10}, { 6, 8, 9, 11}, + { 7, 8, 10, 11}, { 7, 9, 10, 12}, { 7, 9, 11, 12}, { 8, 9, 11, 13}, + { 8, 10, 12, 14}, { 9, 11, 12, 14}, { 9, 11, 13, 15}, { 10, 12, 14, 16}, + { 10, 12, 15, 17}, { 11, 13, 15, 18}, { 11, 14, 16, 19}, { 12, 14, 17, 20}, + { 12, 15, 18, 21}, { 13, 16, 19, 22}, { 14, 17, 20, 23}, { 14, 18, 21, 24}, + { 15, 19, 22, 25}, { 16, 20, 23, 27}, { 17, 21, 25, 28}, { 18, 22, 26, 30}, + { 19, 23, 27, 31}, { 20, 24, 29, 33}, { 21, 26, 30, 35}, { 22, 27, 32, 37}, + { 23, 28, 33, 39}, { 24, 30, 35, 41}, { 26, 31, 37, 43}, { 27, 33, 39, 45}, + { 29, 35, 41, 48}, { 30, 37, 43, 50}, { 32, 39, 46, 53}, { 33, 41, 48, 56}, + { 35, 43, 51, 59}, { 37, 45, 54, 62}, { 39, 48, 56, 65}, { 41, 50, 59, 69}, + { 43, 53, 63, 72}, { 46, 56, 66, 76}, { 48, 59, 69, 80}, { 51, 62, 73, 85}, + { 53, 65, 77, 89}, { 56, 69, 81, 94}, { 59, 72, 86, 99}, { 62, 76, 90, 104}, + { 66, 80, 95, 110}, { 69, 85, 100, 116}, { 73, 89, 105, 122}, { 77, 94, 111, 128}, + { 81, 99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158}, + {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195}, + {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240} +}; + +const uint8_t x264_cabac_transition[128][2] = +{ + { 0, 0}, { 1, 1}, { 2, 50}, { 51, 3}, { 2, 50}, { 51, 3}, { 4, 52}, { 53, 5}, + { 6, 52}, { 53, 7}, { 8, 52}, { 53, 9}, { 10, 54}, { 55, 11}, { 12, 54}, { 55, 13}, + { 14, 54}, { 55, 15}, { 16, 56}, { 57, 17}, { 18, 56}, { 57, 19}, { 20, 56}, { 57, 21}, + { 22, 58}, { 59, 23}, { 24, 58}, { 59, 25}, { 26, 60}, { 61, 27}, { 28, 60}, { 61, 29}, + { 30, 60}, { 61, 31}, { 32, 62}, { 63, 33}, { 34, 62}, { 63, 35}, { 36, 64}, { 65, 37}, + { 38, 66}, { 67, 39}, { 40, 66}, { 67, 41}, { 42, 66}, { 67, 43}, { 44, 68}, { 69, 45}, + { 46, 68}, { 69, 47}, { 48, 70}, { 71, 49}, { 50, 72}, { 73, 51}, { 52, 72}, { 73, 53}, + { 54, 74}, { 75, 55}, { 56, 74}, { 75, 57}, { 58, 76}, { 77, 59}, { 60, 78}, { 79, 61}, + { 62, 78}, { 79, 63}, { 64, 80}, { 81, 65}, { 66, 82}, { 83, 67}, { 68, 82}, { 83, 69}, + { 70, 84}, { 85, 71}, { 72, 84}, { 85, 73}, { 74, 88}, { 89, 75}, { 76, 88}, { 89, 77}, + { 78, 90}, { 91, 79}, { 80, 90}, { 91, 81}, { 82, 94}, { 95, 83}, { 84, 94}, { 95, 85}, + { 86, 96}, { 97, 87}, { 88, 96}, { 97, 89}, { 90, 100}, {101, 91}, { 92, 100}, {101, 93}, + { 94, 102}, {103, 95}, { 96, 104}, {105, 97}, { 98, 104}, {105, 99}, {100, 108}, {109, 101}, + {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109}, + {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117}, + {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125} +}; + +const uint8_t x264_cabac_renorm_shift[64] = +{ + 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + +/* -ln2(probability) */ +const uint16_t x264_cabac_entropy[128] = +{ + FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618), + FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114), + FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610), + FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106), + FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602), + FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099), + FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595), + FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091), + FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588), + FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083), + FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580), + FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076), + FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572), + FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068), + FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565), + FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061), + FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557), + FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053), + FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549), + FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046), + FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542), + FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038), + FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534), + FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030), + FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527), + FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023), + FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519), + FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015), + FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511), + FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008), + FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504), + FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000) +}; + +/***************************************************************************** + * RDO + *****************************************************************************/ + +/* Padded to [64] for easier addressing */ +const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] = +{{ + 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, + 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, + 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 +},{ + 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, + 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11, + 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, + 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 +}}; +const uint8_t x264_last_coeff_flag_offset_8x8[63] = +{ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 +}; +const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */ +const uint16_t x264_significant_coeff_flag_offset[2][16] = +{ + { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 }, + { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 } +}; +const uint16_t x264_last_coeff_flag_offset[2][16] = +{ + { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 }, + { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 } +}; +const uint16_t x264_coeff_abs_level_m1_offset[16] = +{ + 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766 +}; +const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63}; + +/***************************************************************************** + * VLC + *****************************************************************************/ + +/* [nC] */ +const vlc_t x264_coeff0_token[6] = +{ + { 0x1, 1 }, /* str=1 */ + { 0x3, 2 }, /* str=11 */ + { 0xf, 4 }, /* str=1111 */ + { 0x3, 6 }, /* str=000011 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 1 }, /* str=1 */ +}; + +/* [nC][i_total_coeff-1][i_trailing] */ +const vlc_t x264_coeff_token[6][16][4] = +{ + { /* table 0 */ + { /* i_total 1 */ + { 0x5, 6 }, /* str=000101 */ + { 0x1, 2 }, /* str=01 */ + }, + { /* i_total 2 */ + { 0x7, 8 }, /* str=00000111 */ + { 0x4, 6 }, /* str=000100 */ + { 0x1, 3 }, /* str=001 */ + }, + { /* i_total 3 */ + { 0x7, 9 }, /* str=000000111 */ + { 0x6, 8 }, /* str=00000110 */ + { 0x5, 7 }, /* str=0000101 */ + { 0x3, 5 }, /* str=00011 */ + }, + { /* i_total 4 */ + { 0x7, 10 }, /* str=0000000111 */ + { 0x6, 9 }, /* str=000000110 */ + { 0x5, 8 }, /* str=00000101 */ + { 0x3, 6 }, /* str=000011 */ + }, + { /* i_total 5 */ + { 0x7, 11 }, /* str=00000000111 */ + { 0x6, 10 }, /* str=0000000110 */ + { 0x5, 9 }, /* str=000000101 */ + { 0x4, 7 }, /* str=0000100 */ + }, + { /* i_total 6 */ + { 0xf, 13 }, /* str=0000000001111 */ + { 0x6, 11 }, /* str=00000000110 */ + { 0x5, 10 }, /* str=0000000101 */ + { 0x4, 8 }, /* str=00000100 */ + }, + { /* i_total 7 */ + { 0xb, 13 }, /* str=0000000001011 */ + { 0xe, 13 }, /* str=0000000001110 */ + { 0x5, 11 }, /* str=00000000101 */ + { 0x4, 9 }, /* str=000000100 */ + }, + { /* i_total 8 */ + { 0x8, 13 }, /* str=0000000001000 */ + { 0xa, 13 }, /* str=0000000001010 */ + { 0xd, 13 }, /* str=0000000001101 */ + { 0x4, 10 }, /* str=0000000100 */ + }, + { /* i_total 9 */ + { 0xf, 14 }, /* str=00000000001111 */ + { 0xe, 14 }, /* str=00000000001110 */ + { 0x9, 13 }, /* str=0000000001001 */ + { 0x4, 11 }, /* str=00000000100 */ + }, + { /* i_total 10 */ + { 0xb, 14 }, /* str=00000000001011 */ + { 0xa, 14 }, /* str=00000000001010 */ + { 0xd, 14 }, /* str=00000000001101 */ + { 0xc, 13 }, /* str=0000000001100 */ + }, + { /* i_total 14 */ + { 0xf, 15 }, /* str=000000000001111 */ + { 0xe, 15 }, /* str=000000000001110 */ + { 0x9, 14 }, /* str=00000000001001 */ + { 0xc, 14 }, /* str=00000000001100 */ + }, + { /* i_total 12 */ + { 0xb, 15 }, /* str=000000000001011 */ + { 0xa, 15 }, /* str=000000000001010 */ + { 0xd, 15 }, /* str=000000000001101 */ + { 0x8, 14 }, /* str=00000000001000 */ + }, + { /* i_total 13 */ + { 0xf, 16 }, /* str=0000000000001111 */ + { 0x1, 15 }, /* str=000000000000001 */ + { 0x9, 15 }, /* str=000000000001001 */ + { 0xc, 15 }, /* str=000000000001100 */ + }, + { /* i_total 14 */ + { 0xb, 16 }, /* str=0000000000001011 */ + { 0xe, 16 }, /* str=0000000000001110 */ + { 0xd, 16 }, /* str=0000000000001101 */ + { 0x8, 15 }, /* str=000000000001000 */ + }, + { /* i_total 15 */ + { 0x7, 16 }, /* str=0000000000000111 */ + { 0xa, 16 }, /* str=0000000000001010 */ + { 0x9, 16 }, /* str=0000000000001001 */ + { 0xc, 16 }, /* str=0000000000001100 */ + }, + { /* i_total 16 */ + { 0x4, 16 }, /* str=0000000000000100 */ + { 0x6, 16 }, /* str=0000000000000110 */ + { 0x5, 16 }, /* str=0000000000000101 */ + { 0x8, 16 }, /* str=0000000000001000 */ + }, + }, + { /* table 1 */ + { /* i_total 1 */ + { 0xb, 6 }, /* str=001011 */ + { 0x2, 2 }, /* str=10 */ + }, + { /* i_total 2 */ + { 0x7, 6 }, /* str=000111 */ + { 0x7, 5 }, /* str=00111 */ + { 0x3, 3 }, /* str=011 */ + }, + { /* i_total 3 */ + { 0x7, 7 }, /* str=0000111 */ + { 0xa, 6 }, /* str=001010 */ + { 0x9, 6 }, /* str=001001 */ + { 0x5, 4 }, /* str=0101 */ + }, + { /* i_total 4 */ + { 0x7, 8 }, /* str=00000111 */ + { 0x6, 6 }, /* str=000110 */ + { 0x5, 6 }, /* str=000101 */ + { 0x4, 4 }, /* str=0100 */ + }, + { /* i_total 5 */ + { 0x4, 8 }, /* str=00000100 */ + { 0x6, 7 }, /* str=0000110 */ + { 0x5, 7 }, /* str=0000101 */ + { 0x6, 5 }, /* str=00110 */ + }, + { /* i_total 6 */ + { 0x7, 9 }, /* str=000000111 */ + { 0x6, 8 }, /* str=00000110 */ + { 0x5, 8 }, /* str=00000101 */ + { 0x8, 6 }, /* str=001000 */ + }, + { /* i_total 7 */ + { 0xf, 11 }, /* str=00000001111 */ + { 0x6, 9 }, /* str=000000110 */ + { 0x5, 9 }, /* str=000000101 */ + { 0x4, 6 }, /* str=000100 */ + }, + { /* i_total 8 */ + { 0xb, 11 }, /* str=00000001011 */ + { 0xe, 11 }, /* str=00000001110 */ + { 0xd, 11 }, /* str=00000001101 */ + { 0x4, 7 }, /* str=0000100 */ + }, + { /* i_total 9 */ + { 0xf, 12 }, /* str=000000001111 */ + { 0xa, 11 }, /* str=00000001010 */ + { 0x9, 11 }, /* str=00000001001 */ + { 0x4, 9 }, /* str=000000100 */ + }, + { /* i_total 10 */ + { 0xb, 12 }, /* str=000000001011 */ + { 0xe, 12 }, /* str=000000001110 */ + { 0xd, 12 }, /* str=000000001101 */ + { 0xc, 11 }, /* str=00000001100 */ + }, + { /* i_total 11 */ + { 0x8, 12 }, /* str=000000001000 */ + { 0xa, 12 }, /* str=000000001010 */ + { 0x9, 12 }, /* str=000000001001 */ + { 0x8, 11 }, /* str=00000001000 */ + }, + { /* i_total 12 */ + { 0xf, 13 }, /* str=0000000001111 */ + { 0xe, 13 }, /* str=0000000001110 */ + { 0xd, 13 }, /* str=0000000001101 */ + { 0xc, 12 }, /* str=000000001100 */ + }, + { /* i_total 13 */ + { 0xb, 13 }, /* str=0000000001011 */ + { 0xa, 13 }, /* str=0000000001010 */ + { 0x9, 13 }, /* str=0000000001001 */ + { 0xc, 13 }, /* str=0000000001100 */ + }, + { /* i_total 14 */ + { 0x7, 13 }, /* str=0000000000111 */ + { 0xb, 14 }, /* str=00000000001011 */ + { 0x6, 13 }, /* str=0000000000110 */ + { 0x8, 13 }, /* str=0000000001000 */ + }, + { /* i_total 15 */ + { 0x9, 14 }, /* str=00000000001001 */ + { 0x8, 14 }, /* str=00000000001000 */ + { 0xa, 14 }, /* str=00000000001010 */ + { 0x1, 13 }, /* str=0000000000001 */ + }, + { /* i_total 16 */ + { 0x7, 14 }, /* str=00000000000111 */ + { 0x6, 14 }, /* str=00000000000110 */ + { 0x5, 14 }, /* str=00000000000101 */ + { 0x4, 14 }, /* str=00000000000100 */ + }, + }, + { /* table 2 */ + { /* i_total 1 */ + { 0xf, 6 }, /* str=001111 */ + { 0xe, 4 }, /* str=1110 */ + }, + { /* i_total 2 */ + { 0xb, 6 }, /* str=001011 */ + { 0xf, 5 }, /* str=01111 */ + { 0xd, 4 }, /* str=1101 */ + }, + { /* i_total 3 */ + { 0x8, 6 }, /* str=001000 */ + { 0xc, 5 }, /* str=01100 */ + { 0xe, 5 }, /* str=01110 */ + { 0xc, 4 }, /* str=1100 */ + }, + { /* i_total 4 */ + { 0xf, 7 }, /* str=0001111 */ + { 0xa, 5 }, /* str=01010 */ + { 0xb, 5 }, /* str=01011 */ + { 0xb, 4 }, /* str=1011 */ + }, + { /* i_total 5 */ + { 0xb, 7 }, /* str=0001011 */ + { 0x8, 5 }, /* str=01000 */ + { 0x9, 5 }, /* str=01001 */ + { 0xa, 4 }, /* str=1010 */ + }, + { /* i_total 6 */ + { 0x9, 7 }, /* str=0001001 */ + { 0xe, 6 }, /* str=001110 */ + { 0xd, 6 }, /* str=001101 */ + { 0x9, 4 }, /* str=1001 */ + }, + { /* i_total 7 */ + { 0x8, 7 }, /* str=0001000 */ + { 0xa, 6 }, /* str=001010 */ + { 0x9, 6 }, /* str=001001 */ + { 0x8, 4 }, /* str=1000 */ + }, + { /* i_total 8 */ + { 0xf, 8 }, /* str=00001111 */ + { 0xe, 7 }, /* str=0001110 */ + { 0xd, 7 }, /* str=0001101 */ + { 0xd, 5 }, /* str=01101 */ + }, + { /* i_total 9 */ + { 0xb, 8 }, /* str=00001011 */ + { 0xe, 8 }, /* str=00001110 */ + { 0xa, 7 }, /* str=0001010 */ + { 0xc, 6 }, /* str=001100 */ + }, + { /* i_total 10 */ + { 0xf, 9 }, /* str=000001111 */ + { 0xa, 8 }, /* str=00001010 */ + { 0xd, 8 }, /* str=00001101 */ + { 0xc, 7 }, /* str=0001100 */ + }, + { /* i_total 11 */ + { 0xb, 9 }, /* str=000001011 */ + { 0xe, 9 }, /* str=000001110 */ + { 0x9, 8 }, /* str=00001001 */ + { 0xc, 8 }, /* str=00001100 */ + }, + { /* i_total 12 */ + { 0x8, 9 }, /* str=000001000 */ + { 0xa, 9 }, /* str=000001010 */ + { 0xd, 9 }, /* str=000001101 */ + { 0x8, 8 }, /* str=00001000 */ + }, + { /* i_total 13 */ + { 0xd, 10 }, /* str=0000001101 */ + { 0x7, 9 }, /* str=000000111 */ + { 0x9, 9 }, /* str=000001001 */ + { 0xc, 9 }, /* str=000001100 */ + }, + { /* i_total 14 */ + { 0x9, 10 }, /* str=0000001001 */ + { 0xc, 10 }, /* str=0000001100 */ + { 0xb, 10 }, /* str=0000001011 */ + { 0xa, 10 }, /* str=0000001010 */ + }, + { /* i_total 15 */ + { 0x5, 10 }, /* str=0000000101 */ + { 0x8, 10 }, /* str=0000001000 */ + { 0x7, 10 }, /* str=0000000111 */ + { 0x6, 10 }, /* str=0000000110 */ + }, + { /* i_total 16 */ + { 0x1, 10 }, /* str=0000000001 */ + { 0x4, 10 }, /* str=0000000100 */ + { 0x3, 10 }, /* str=0000000011 */ + { 0x2, 10 }, /* str=0000000010 */ + }, + }, + { /* table 3 */ + { /* i_total 1 */ + { 0x0, 6 }, /* str=000000 */ + { 0x1, 6 }, /* str=000001 */ + }, + { /* i_total 2 */ + { 0x4, 6 }, /* str=000100 */ + { 0x5, 6 }, /* str=000101 */ + { 0x6, 6 }, /* str=000110 */ + }, + { /* i_total 3 */ + { 0x8, 6 }, /* str=001000 */ + { 0x9, 6 }, /* str=001001 */ + { 0xa, 6 }, /* str=001010 */ + { 0xb, 6 }, /* str=001011 */ + }, + { /* i_total 4 */ + { 0xc, 6 }, /* str=001100 */ + { 0xd, 6 }, /* str=001101 */ + { 0xe, 6 }, /* str=001110 */ + { 0xf, 6 }, /* str=001111 */ + }, + { /* i_total 5 */ + { 0x10, 6 }, /* str=010000 */ + { 0x11, 6 }, /* str=010001 */ + { 0x12, 6 }, /* str=010010 */ + { 0x13, 6 }, /* str=010011 */ + }, + { /* i_total 6 */ + { 0x14, 6 }, /* str=010100 */ + { 0x15, 6 }, /* str=010101 */ + { 0x16, 6 }, /* str=010110 */ + { 0x17, 6 }, /* str=010111 */ + }, + { /* i_total 7 */ + { 0x18, 6 }, /* str=011000 */ + { 0x19, 6 }, /* str=011001 */ + { 0x1a, 6 }, /* str=011010 */ + { 0x1b, 6 }, /* str=011011 */ + }, + { /* i_total 8 */ + { 0x1c, 6 }, /* str=011100 */ + { 0x1d, 6 }, /* str=011101 */ + { 0x1e, 6 }, /* str=011110 */ + { 0x1f, 6 }, /* str=011111 */ + }, + { /* i_total 9 */ + { 0x20, 6 }, /* str=100000 */ + { 0x21, 6 }, /* str=100001 */ + { 0x22, 6 }, /* str=100010 */ + { 0x23, 6 }, /* str=100011 */ + }, + { /* i_total 10 */ + { 0x24, 6 }, /* str=100100 */ + { 0x25, 6 }, /* str=100101 */ + { 0x26, 6 }, /* str=100110 */ + { 0x27, 6 }, /* str=100111 */ + }, + { /* i_total 11 */ + { 0x28, 6 }, /* str=101000 */ + { 0x29, 6 }, /* str=101001 */ + { 0x2a, 6 }, /* str=101010 */ + { 0x2b, 6 }, /* str=101011 */ + }, + { /* i_total 12 */ + { 0x2c, 6 }, /* str=101100 */ + { 0x2d, 6 }, /* str=101101 */ + { 0x2e, 6 }, /* str=101110 */ + { 0x2f, 6 }, /* str=101111 */ + }, + { /* i_total 13 */ + { 0x30, 6 }, /* str=110000 */ + { 0x31, 6 }, /* str=110001 */ + { 0x32, 6 }, /* str=110010 */ + { 0x33, 6 }, /* str=110011 */ + }, + { /* i_total 14 */ + { 0x34, 6 }, /* str=110100 */ + { 0x35, 6 }, /* str=110101 */ + { 0x36, 6 }, /* str=110110 */ + { 0x37, 6 }, /* str=110111 */ + }, + { /* i_total 15 */ + { 0x38, 6 }, /* str=111000 */ + { 0x39, 6 }, /* str=111001 */ + { 0x3a, 6 }, /* str=111010 */ + { 0x3b, 6 }, /* str=111011 */ + }, + { /* i_total 16 */ + { 0x3c, 6 }, /* str=111100 */ + { 0x3d, 6 }, /* str=111101 */ + { 0x3e, 6 }, /* str=111110 */ + { 0x3f, 6 }, /* str=111111 */ + }, + }, + { /* table 4 */ + { /* i_total 1 */ + { 0x7, 6 }, /* str=000111 */ + { 0x1, 1 }, /* str=1 */ + }, + { /* i_total 2 */ + { 0x4, 6 }, /* str=000100 */ + { 0x6, 6 }, /* str=000110 */ + { 0x1, 3 }, /* str=001 */ + }, + { /* i_total 3 */ + { 0x3, 6 }, /* str=000011 */ + { 0x3, 7 }, /* str=0000011 */ + { 0x2, 7 }, /* str=0000010 */ + { 0x5, 6 }, /* str=000101 */ + }, + { /* i_total 4 */ + { 0x2, 6 }, /* str=000010 */ + { 0x3, 8 }, /* str=00000011 */ + { 0x2, 8 }, /* str=00000010 */ + { 0x0, 7 }, /* str=0000000 */ + }, + }, + { /* table 5 */ + { /* i_total 1 */ + { 0xf, 7 }, /* str=0001111 */ + { 0x1, 2 }, /* str=01 */ + }, + { /* i_total 2 */ + { 0xe, 7 }, /* str=0001110 */ + { 0xd, 7 }, /* str=0001101 */ + { 0x1, 3 }, /* str=001 */ + }, + { /* i_total 3 */ + { 0x7, 9 }, /* str=000000111 */ + { 0xc, 7 }, /* str=0001100 */ + { 0xb, 7 }, /* str=0001011 */ + { 0x1, 5 }, /* str=00001 */ + }, + { /* i_total 4 */ + { 0x6, 9 }, /* str=000000110 */ + { 0x5, 9 }, /* str=000000101 */ + { 0xa, 7 }, /* str=0001010 */ + { 0x1, 6 }, /* str=000001 */ + }, + { /* i_total 5 */ + { 0x7, 10 }, /* str=0000000111 */ + { 0x6, 10 }, /* str=0000000110 */ + { 0x4, 9 }, /* str=000000100 */ + { 0x9, 7 }, /* str=0001001 */ + }, + { /* i_total 6 */ + { 0x7, 11 }, /* str=00000000111 */ + { 0x6, 11 }, /* str=00000000110 */ + { 0x5, 10 }, /* str=0000000101 */ + { 0x8, 7 }, /* str=0001000 */ + }, + { /* i_total 7 */ + { 0x7, 12 }, /* str=000000000111 */ + { 0x6, 12 }, /* str=000000000110 */ + { 0x5, 11 }, /* str=00000000101 */ + { 0x4, 10 }, /* str=0000000100 */ + }, + { /* i_total 8 */ + { 0x7, 13 }, /* str=0000000000111 */ + { 0x5, 12 }, /* str=000000000101 */ + { 0x4, 12 }, /* str=000000000100 */ + { 0x4, 11 }, /* str=00000000100 */ + }, + }, +}; + +/* [i_total_coeff-1][i_total_zeros] */ +const vlc_t x264_total_zeros[15][16] = +{ + { /* i_total 1 */ + { 0x1, 1 }, /* str=1 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 3 }, /* str=010 */ + { 0x3, 4 }, /* str=0011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x3, 5 }, /* str=00011 */ + { 0x2, 5 }, /* str=00010 */ + { 0x3, 6 }, /* str=000011 */ + { 0x2, 6 }, /* str=000010 */ + { 0x3, 7 }, /* str=0000011 */ + { 0x2, 7 }, /* str=0000010 */ + { 0x3, 8 }, /* str=00000011 */ + { 0x2, 8 }, /* str=00000010 */ + { 0x3, 9 }, /* str=000000011 */ + { 0x2, 9 }, /* str=000000010 */ + { 0x1, 9 }, /* str=000000001 */ + }, + { /* i_total 2 */ + { 0x7, 3 }, /* str=111 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x5, 4 }, /* str=0101 */ + { 0x4, 4 }, /* str=0100 */ + { 0x3, 4 }, /* str=0011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x3, 5 }, /* str=00011 */ + { 0x2, 5 }, /* str=00010 */ + { 0x3, 6 }, /* str=000011 */ + { 0x2, 6 }, /* str=000010 */ + { 0x1, 6 }, /* str=000001 */ + { 0x0, 6 }, /* str=000000 */ + }, + { /* i_total 3 */ + { 0x5, 4 }, /* str=0101 */ + { 0x7, 3 }, /* str=111 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 4 }, /* str=0100 */ + { 0x3, 4 }, /* str=0011 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x3, 5 }, /* str=00011 */ + { 0x2, 5 }, /* str=00010 */ + { 0x1, 6 }, /* str=000001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x0, 6 }, /* str=000000 */ + }, + { /* i_total 4 */ + { 0x3, 5 }, /* str=00011 */ + { 0x7, 3 }, /* str=111 */ + { 0x5, 4 }, /* str=0101 */ + { 0x4, 4 }, /* str=0100 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 4 }, /* str=0011 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x2, 5 }, /* str=00010 */ + { 0x1, 5 }, /* str=00001 */ + { 0x0, 5 }, /* str=00000 */ + }, + { /* i_total 5 */ + { 0x5, 4 }, /* str=0101 */ + { 0x4, 4 }, /* str=0100 */ + { 0x3, 4 }, /* str=0011 */ + { 0x7, 3 }, /* str=111 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x1, 5 }, /* str=00001 */ + { 0x1, 4 }, /* str=0001 */ + { 0x0, 5 }, /* str=00000 */ + }, + { /* i_total 6 */ + { 0x1, 6 }, /* str=000001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x7, 3 }, /* str=111 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 6 }, /* str=000000 */ + }, + { /* i_total 7 */ + { 0x1, 6 }, /* str=000001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 6 }, /* str=000000 */ + }, + { /* i_total 8 */ + { 0x1, 6 }, /* str=000001 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x3, 3 }, /* str=011 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 6 }, /* str=000000 */ + }, + { /* i_total 9 */ + { 0x1, 6 }, /* str=000001 */ + { 0x0, 6 }, /* str=000000 */ + { 0x1, 4 }, /* str=0001 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x1, 3 }, /* str=001 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 5 }, /* str=00001 */ + }, + { /* i_total 10 */ + { 0x1, 5 }, /* str=00001 */ + { 0x0, 5 }, /* str=00000 */ + { 0x1, 3 }, /* str=001 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 4 }, /* str=0001 */ + }, + { /* i_total 11 */ + { 0x0, 4 }, /* str=0000 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 3 }, /* str=001 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 1 }, /* str=1 */ + { 0x3, 3 }, /* str=011 */ + }, + { /* i_total 12 */ + { 0x0, 4 }, /* str=0000 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 1 }, /* str=1 */ + { 0x1, 3 }, /* str=001 */ + }, + { /* i_total 13 */ + { 0x0, 3 }, /* str=000 */ + { 0x1, 3 }, /* str=001 */ + { 0x1, 1 }, /* str=1 */ + { 0x1, 2 }, /* str=01 */ + }, + { /* i_total 14 */ + { 0x0, 2 }, /* str=00 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 1 }, /* str=1 */ + }, + { /* i_total 15 */ + { 0x0, 1 }, /* str=0 */ + { 0x1, 1 }, /* str=1 */ + }, +}; + +/* [i_total_coeff-1][i_total_zeros] */ +const vlc_t x264_total_zeros_2x2_dc[3][4] = +{ + { /* i_total 1 */ + { 0x1, 1 }, /* str=1 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 3 } /* str=000 */ + }, + { /* i_total 2 */ + { 0x1, 1 }, /* str=1 */ + { 0x1, 2 }, /* str=01 */ + { 0x0, 2 }, /* str=00 */ + }, + { /* i_total 3 */ + { 0x1, 1 }, /* str=1 */ + { 0x0, 1 }, /* str=0 */ + }, +}; + +/* [i_total_coeff-1][i_total_zeros] */ +const vlc_t x264_total_zeros_2x4_dc[7][8] = +{ + { /* i_total 1 */ + { 0x1, 1 }, /* str=1 */ + { 0x2, 3 }, /* str=010 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 4 }, /* str=0010 */ + { 0x3, 4 }, /* str=0011 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x0, 5 }, /* str=00000 */ + }, + { /* i_total 2 */ + { 0x0, 3 }, /* str=000 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 3 }, /* str=001 */ + { 0x4, 3 }, /* str=100 */ + { 0x5, 3 }, /* str=101 */ + { 0x6, 3 }, /* str=110 */ + { 0x7, 3 }, /* str=111 */ + }, + { /* i_total 3 */ + { 0x0, 3 }, /* str=000 */ + { 0x1, 3 }, /* str=001 */ + { 0x1, 2 }, /* str=01 */ + { 0x2, 2 }, /* str=10 */ + { 0x6, 3 }, /* str=110 */ + { 0x7, 3 }, /* str=111 */ + }, + { /* i_total 4 */ + { 0x6, 3 }, /* str=110 */ + { 0x0, 2 }, /* str=00 */ + { 0x1, 2 }, /* str=01 */ + { 0x2, 2 }, /* str=10 */ + { 0x7, 3 }, /* str=111 */ + }, + { /* i_total 5 */ + { 0x0, 2 }, /* str=00 */ + { 0x1, 2 }, /* str=01 */ + { 0x2, 2 }, /* str=10 */ + { 0x3, 2 }, /* str=11 */ + }, + { /* i_total 6 */ + { 0x0, 2 }, /* str=00 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 1 }, /* str=1 */ + }, + { /* i_total 7 */ + { 0x0, 1 }, /* str=0 */ + { 0x1, 1 }, /* str=1 */ + } +}; + +/* [MIN( i_zero_left-1, 6 )][run_before] */ +const vlc_t x264_run_before_init[7][16] = +{ + { /* i_zero_left 1 */ + { 0x1, 1 }, /* str=1 */ + { 0x0, 1 }, /* str=0 */ + }, + { /* i_zero_left 2 */ + { 0x1, 1 }, /* str=1 */ + { 0x1, 2 }, /* str=01 */ + { 0x0, 2 }, /* str=00 */ + }, + { /* i_zero_left 3 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x1, 2 }, /* str=01 */ + { 0x0, 2 }, /* str=00 */ + }, + { /* i_zero_left 4 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x1, 2 }, /* str=01 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 3 }, /* str=000 */ + }, + { /* i_zero_left 5 */ + { 0x3, 2 }, /* str=11 */ + { 0x2, 2 }, /* str=10 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 3 }, /* str=001 */ + { 0x0, 3 }, /* str=000 */ + }, + { /* i_zero_left 6 */ + { 0x3, 2 }, /* str=11 */ + { 0x0, 3 }, /* str=000 */ + { 0x1, 3 }, /* str=001 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 3 }, /* str=010 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + }, + { /* i_zero_left >6 */ + { 0x7, 3 }, /* str=111 */ + { 0x6, 3 }, /* str=110 */ + { 0x5, 3 }, /* str=101 */ + { 0x4, 3 }, /* str=100 */ + { 0x3, 3 }, /* str=011 */ + { 0x2, 3 }, /* str=010 */ + { 0x1, 3 }, /* str=001 */ + { 0x1, 4 }, /* str=0001 */ + { 0x1, 5 }, /* str=00001 */ + { 0x1, 6 }, /* str=000001 */ + { 0x1, 7 }, /* str=0000001 */ + { 0x1, 8 }, /* str=00000001 */ + { 0x1, 9 }, /* str=000000001 */ + { 0x1, 10 }, /* str=0000000001 */ + { 0x1, 11 }, /* str=00000000001 */ + }, +}; + +/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*sizeof(pixel) */ +ALIGNED_64( uint8_t x264_zero[1024] ) = { 0 }; diff -Nru x264-0.152.2854+gite9a5903/common/tables.h x264-0.158.2988+git-20191101.7817004/common/tables.h --- x264-0.152.2854+gite9a5903/common/tables.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/tables.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,99 @@ +/***************************************************************************** + * tables.h: const tables + ***************************************************************************** + * Copyright (C) 2003-2019 x264 project + * + * Authors: Laurent Aimar + * Loren Merritt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_TABLES_H +#define X264_TABLES_H + +typedef struct +{ + uint8_t i_bits; + uint8_t i_size; +} vlc_t; + +X264_API extern const x264_level_t x264_levels[]; + +extern const uint8_t x264_exp2_lut[64]; +extern const float x264_log2_lut[128]; +extern const float x264_log2_lz_lut[32]; + +#define QP_MAX_MAX (51+6*2+18) +extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1]; +extern const int x264_lambda2_tab[QP_MAX_MAX+1]; +extern const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1]; +#define MAX_CHROMA_LAMBDA_OFFSET 36 +extern const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1]; + +extern const uint8_t x264_hpel_ref0[16]; +extern const uint8_t x264_hpel_ref1[16]; + +extern const uint8_t x264_cqm_jvt4i[16]; +extern const uint8_t x264_cqm_jvt4p[16]; +extern const uint8_t x264_cqm_jvt8i[64]; +extern const uint8_t x264_cqm_jvt8p[64]; +extern const uint8_t x264_cqm_flat16[64]; +extern const uint8_t * const x264_cqm_jvt[8]; +extern const uint8_t x264_cqm_avci50_4ic[16]; +extern const uint8_t x264_cqm_avci50_p_8iy[64]; +extern const uint8_t x264_cqm_avci50_1080i_8iy[64]; +extern const uint8_t x264_cqm_avci100_720p_4ic[16]; +extern const uint8_t x264_cqm_avci100_720p_8iy[64]; +extern const uint8_t x264_cqm_avci100_1080_4ic[16]; +extern const uint8_t x264_cqm_avci100_1080i_8iy[64]; +extern const uint8_t x264_cqm_avci100_1080p_8iy[64]; + +extern const uint8_t x264_decimate_table4[16]; +extern const uint8_t x264_decimate_table8[64]; + +extern const uint32_t x264_dct4_weight_tab[16]; +extern const uint32_t x264_dct8_weight_tab[64]; +extern const uint32_t x264_dct4_weight2_tab[16]; +extern const uint32_t x264_dct8_weight2_tab[64]; + +extern const int8_t x264_cabac_context_init_I[1024][2]; +extern const int8_t x264_cabac_context_init_PB[3][1024][2]; +extern const uint8_t x264_cabac_range_lps[64][4]; +extern const uint8_t x264_cabac_transition[128][2]; +extern const uint8_t x264_cabac_renorm_shift[64]; +extern const uint16_t x264_cabac_entropy[128]; + +extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][64]; +extern const uint8_t x264_last_coeff_flag_offset_8x8[63]; +extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7]; +extern const uint16_t x264_significant_coeff_flag_offset[2][16]; +extern const uint16_t x264_last_coeff_flag_offset[2][16]; +extern const uint16_t x264_coeff_abs_level_m1_offset[16]; +extern const uint8_t x264_count_cat_m1[14]; + +extern const vlc_t x264_coeff0_token[6]; +extern const vlc_t x264_coeff_token[6][16][4]; +extern const vlc_t x264_total_zeros[15][16]; +extern const vlc_t x264_total_zeros_2x2_dc[3][4]; +extern const vlc_t x264_total_zeros_2x4_dc[7][8]; +extern const vlc_t x264_run_before_init[7][16]; + +extern uint8_t x264_zero[1024]; + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/threadpool.c x264-0.158.2988+git-20191101.7817004/common/threadpool.c --- x264-0.152.2854+gite9a5903/common/threadpool.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/threadpool.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.c: thread pooling ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -34,7 +34,7 @@ struct x264_threadpool_t { - int exit; + volatile int exit; int threads; x264_pthread_t *thread_handle; void (*init_func)(void *); @@ -47,7 +47,7 @@ x264_sync_frame_list_t done; /* list of jobs that have finished processing */ }; -static void *x264_threadpool_thread( x264_threadpool_t *pool ) +REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool ) { if( pool->init_func ) pool->init_func( pool->init_arg ); @@ -66,7 +66,7 @@ x264_pthread_mutex_unlock( &pool->run.mutex ); if( !job ) continue; - job->ret = (void*)x264_stack_align( job->func, job->arg ); /* execute the function */ + job->ret = job->func( job->arg ); x264_sync_frame_list_push( &pool->done, (void*)job ); } return NULL; @@ -78,6 +78,9 @@ if( threads <= 0 ) return -1; + if( x264_threading_init() < 0 ) + return -1; + x264_threadpool_t *pool; CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) ); *p_pool = pool; @@ -100,7 +103,7 @@ x264_sync_frame_list_push( &pool->uninit, (void*)job ); } for( int i = 0; i < pool->threads; i++ ) - if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) ) + if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)threadpool_thread, pool ) ) goto fail; return 0; @@ -137,7 +140,7 @@ } } -static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist ) +static void threadpool_list_delete( x264_sync_frame_list_t *slist ) { for( int i = 0; slist->list[i]; i++ ) { @@ -156,9 +159,9 @@ for( int i = 0; i < pool->threads; i++ ) x264_pthread_join( pool->thread_handle[i], NULL ); - x264_threadpool_list_delete( &pool->uninit ); - x264_threadpool_list_delete( &pool->run ); - x264_threadpool_list_delete( &pool->done ); + threadpool_list_delete( &pool->uninit ); + threadpool_list_delete( &pool->run ); + threadpool_list_delete( &pool->done ); x264_free( pool->thread_handle ); x264_free( pool ); } diff -Nru x264-0.152.2854+gite9a5903/common/threadpool.h x264-0.158.2988+git-20191101.7817004/common/threadpool.h --- x264-0.152.2854+gite9a5903/common/threadpool.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/threadpool.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.h: thread pooling ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -29,11 +29,15 @@ typedef struct x264_threadpool_t x264_threadpool_t; #if HAVE_THREAD -int x264_threadpool_init( x264_threadpool_t **p_pool, int threads, - void (*init_func)(void *), void *init_arg ); -void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg ); -void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg ); -void x264_threadpool_delete( x264_threadpool_t *pool ); +#define x264_threadpool_init x264_template(threadpool_init) +X264_API int x264_threadpool_init( x264_threadpool_t **p_pool, int threads, + void (*init_func)(void *), void *init_arg ); +#define x264_threadpool_run x264_template(threadpool_run) +X264_API void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg ); +#define x264_threadpool_wait x264_template(threadpool_wait) +X264_API void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg ); +#define x264_threadpool_delete x264_template(threadpool_delete) +X264_API void x264_threadpool_delete( x264_threadpool_t *pool ); #else #define x264_threadpool_init(p,t,f,a) -1 #define x264_threadpool_run(p,f,a) diff -Nru x264-0.152.2854+gite9a5903/common/vlc.c x264-0.158.2988+git-20191101.7817004/common/vlc.c --- x264-0.152.2854+gite9a5903/common/vlc.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/vlc.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * vlc.c : vlc tables ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Fiona Glaser @@ -27,777 +27,6 @@ #include "common.h" -/* [nC] */ -const vlc_t x264_coeff0_token[6] = -{ - { 0x1, 1 }, /* str=1 */ - { 0x3, 2 }, /* str=11 */ - { 0xf, 4 }, /* str=1111 */ - { 0x3, 6 }, /* str=000011 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 1 }, /* str=1 */ -}; - -/* [nC][i_total_coeff-1][i_trailing] */ -const vlc_t x264_coeff_token[6][16][4] = -{ - { /* table 0 */ - { /* i_total 1 */ - { 0x5, 6 }, /* str=000101 */ - { 0x1, 2 }, /* str=01 */ - }, - { /* i_total 2 */ - { 0x7, 8 }, /* str=00000111 */ - { 0x4, 6 }, /* str=000100 */ - { 0x1, 3 }, /* str=001 */ - }, - { /* i_total 3 */ - { 0x7, 9 }, /* str=000000111 */ - { 0x6, 8 }, /* str=00000110 */ - { 0x5, 7 }, /* str=0000101 */ - { 0x3, 5 }, /* str=00011 */ - }, - { /* i_total 4 */ - { 0x7, 10 }, /* str=0000000111 */ - { 0x6, 9 }, /* str=000000110 */ - { 0x5, 8 }, /* str=00000101 */ - { 0x3, 6 }, /* str=000011 */ - }, - { /* i_total 5 */ - { 0x7, 11 }, /* str=00000000111 */ - { 0x6, 10 }, /* str=0000000110 */ - { 0x5, 9 }, /* str=000000101 */ - { 0x4, 7 }, /* str=0000100 */ - }, - { /* i_total 6 */ - { 0xf, 13 }, /* str=0000000001111 */ - { 0x6, 11 }, /* str=00000000110 */ - { 0x5, 10 }, /* str=0000000101 */ - { 0x4, 8 }, /* str=00000100 */ - }, - { /* i_total 7 */ - { 0xb, 13 }, /* str=0000000001011 */ - { 0xe, 13 }, /* str=0000000001110 */ - { 0x5, 11 }, /* str=00000000101 */ - { 0x4, 9 }, /* str=000000100 */ - }, - { /* i_total 8 */ - { 0x8, 13 }, /* str=0000000001000 */ - { 0xa, 13 }, /* str=0000000001010 */ - { 0xd, 13 }, /* str=0000000001101 */ - { 0x4, 10 }, /* str=0000000100 */ - }, - { /* i_total 9 */ - { 0xf, 14 }, /* str=00000000001111 */ - { 0xe, 14 }, /* str=00000000001110 */ - { 0x9, 13 }, /* str=0000000001001 */ - { 0x4, 11 }, /* str=00000000100 */ - }, - { /* i_total 10 */ - { 0xb, 14 }, /* str=00000000001011 */ - { 0xa, 14 }, /* str=00000000001010 */ - { 0xd, 14 }, /* str=00000000001101 */ - { 0xc, 13 }, /* str=0000000001100 */ - }, - { /* i_total 14 */ - { 0xf, 15 }, /* str=000000000001111 */ - { 0xe, 15 }, /* str=000000000001110 */ - { 0x9, 14 }, /* str=00000000001001 */ - { 0xc, 14 }, /* str=00000000001100 */ - }, - { /* i_total 12 */ - { 0xb, 15 }, /* str=000000000001011 */ - { 0xa, 15 }, /* str=000000000001010 */ - { 0xd, 15 }, /* str=000000000001101 */ - { 0x8, 14 }, /* str=00000000001000 */ - }, - { /* i_total 13 */ - { 0xf, 16 }, /* str=0000000000001111 */ - { 0x1, 15 }, /* str=000000000000001 */ - { 0x9, 15 }, /* str=000000000001001 */ - { 0xc, 15 }, /* str=000000000001100 */ - }, - { /* i_total 14 */ - { 0xb, 16 }, /* str=0000000000001011 */ - { 0xe, 16 }, /* str=0000000000001110 */ - { 0xd, 16 }, /* str=0000000000001101 */ - { 0x8, 15 }, /* str=000000000001000 */ - }, - { /* i_total 15 */ - { 0x7, 16 }, /* str=0000000000000111 */ - { 0xa, 16 }, /* str=0000000000001010 */ - { 0x9, 16 }, /* str=0000000000001001 */ - { 0xc, 16 }, /* str=0000000000001100 */ - }, - { /* i_total 16 */ - { 0x4, 16 }, /* str=0000000000000100 */ - { 0x6, 16 }, /* str=0000000000000110 */ - { 0x5, 16 }, /* str=0000000000000101 */ - { 0x8, 16 }, /* str=0000000000001000 */ - }, - }, - { /* table 1 */ - { /* i_total 1 */ - { 0xb, 6 }, /* str=001011 */ - { 0x2, 2 }, /* str=10 */ - }, - { /* i_total 2 */ - { 0x7, 6 }, /* str=000111 */ - { 0x7, 5 }, /* str=00111 */ - { 0x3, 3 }, /* str=011 */ - }, - { /* i_total 3 */ - { 0x7, 7 }, /* str=0000111 */ - { 0xa, 6 }, /* str=001010 */ - { 0x9, 6 }, /* str=001001 */ - { 0x5, 4 }, /* str=0101 */ - }, - { /* i_total 4 */ - { 0x7, 8 }, /* str=00000111 */ - { 0x6, 6 }, /* str=000110 */ - { 0x5, 6 }, /* str=000101 */ - { 0x4, 4 }, /* str=0100 */ - }, - { /* i_total 5 */ - { 0x4, 8 }, /* str=00000100 */ - { 0x6, 7 }, /* str=0000110 */ - { 0x5, 7 }, /* str=0000101 */ - { 0x6, 5 }, /* str=00110 */ - }, - { /* i_total 6 */ - { 0x7, 9 }, /* str=000000111 */ - { 0x6, 8 }, /* str=00000110 */ - { 0x5, 8 }, /* str=00000101 */ - { 0x8, 6 }, /* str=001000 */ - }, - { /* i_total 7 */ - { 0xf, 11 }, /* str=00000001111 */ - { 0x6, 9 }, /* str=000000110 */ - { 0x5, 9 }, /* str=000000101 */ - { 0x4, 6 }, /* str=000100 */ - }, - { /* i_total 8 */ - { 0xb, 11 }, /* str=00000001011 */ - { 0xe, 11 }, /* str=00000001110 */ - { 0xd, 11 }, /* str=00000001101 */ - { 0x4, 7 }, /* str=0000100 */ - }, - { /* i_total 9 */ - { 0xf, 12 }, /* str=000000001111 */ - { 0xa, 11 }, /* str=00000001010 */ - { 0x9, 11 }, /* str=00000001001 */ - { 0x4, 9 }, /* str=000000100 */ - }, - { /* i_total 10 */ - { 0xb, 12 }, /* str=000000001011 */ - { 0xe, 12 }, /* str=000000001110 */ - { 0xd, 12 }, /* str=000000001101 */ - { 0xc, 11 }, /* str=00000001100 */ - }, - { /* i_total 11 */ - { 0x8, 12 }, /* str=000000001000 */ - { 0xa, 12 }, /* str=000000001010 */ - { 0x9, 12 }, /* str=000000001001 */ - { 0x8, 11 }, /* str=00000001000 */ - }, - { /* i_total 12 */ - { 0xf, 13 }, /* str=0000000001111 */ - { 0xe, 13 }, /* str=0000000001110 */ - { 0xd, 13 }, /* str=0000000001101 */ - { 0xc, 12 }, /* str=000000001100 */ - }, - { /* i_total 13 */ - { 0xb, 13 }, /* str=0000000001011 */ - { 0xa, 13 }, /* str=0000000001010 */ - { 0x9, 13 }, /* str=0000000001001 */ - { 0xc, 13 }, /* str=0000000001100 */ - }, - { /* i_total 14 */ - { 0x7, 13 }, /* str=0000000000111 */ - { 0xb, 14 }, /* str=00000000001011 */ - { 0x6, 13 }, /* str=0000000000110 */ - { 0x8, 13 }, /* str=0000000001000 */ - }, - { /* i_total 15 */ - { 0x9, 14 }, /* str=00000000001001 */ - { 0x8, 14 }, /* str=00000000001000 */ - { 0xa, 14 }, /* str=00000000001010 */ - { 0x1, 13 }, /* str=0000000000001 */ - }, - { /* i_total 16 */ - { 0x7, 14 }, /* str=00000000000111 */ - { 0x6, 14 }, /* str=00000000000110 */ - { 0x5, 14 }, /* str=00000000000101 */ - { 0x4, 14 }, /* str=00000000000100 */ - }, - }, - { /* table 2 */ - { /* i_total 1 */ - { 0xf, 6 }, /* str=001111 */ - { 0xe, 4 }, /* str=1110 */ - }, - { /* i_total 2 */ - { 0xb, 6 }, /* str=001011 */ - { 0xf, 5 }, /* str=01111 */ - { 0xd, 4 }, /* str=1101 */ - }, - { /* i_total 3 */ - { 0x8, 6 }, /* str=001000 */ - { 0xc, 5 }, /* str=01100 */ - { 0xe, 5 }, /* str=01110 */ - { 0xc, 4 }, /* str=1100 */ - }, - { /* i_total 4 */ - { 0xf, 7 }, /* str=0001111 */ - { 0xa, 5 }, /* str=01010 */ - { 0xb, 5 }, /* str=01011 */ - { 0xb, 4 }, /* str=1011 */ - }, - { /* i_total 5 */ - { 0xb, 7 }, /* str=0001011 */ - { 0x8, 5 }, /* str=01000 */ - { 0x9, 5 }, /* str=01001 */ - { 0xa, 4 }, /* str=1010 */ - }, - { /* i_total 6 */ - { 0x9, 7 }, /* str=0001001 */ - { 0xe, 6 }, /* str=001110 */ - { 0xd, 6 }, /* str=001101 */ - { 0x9, 4 }, /* str=1001 */ - }, - { /* i_total 7 */ - { 0x8, 7 }, /* str=0001000 */ - { 0xa, 6 }, /* str=001010 */ - { 0x9, 6 }, /* str=001001 */ - { 0x8, 4 }, /* str=1000 */ - }, - { /* i_total 8 */ - { 0xf, 8 }, /* str=00001111 */ - { 0xe, 7 }, /* str=0001110 */ - { 0xd, 7 }, /* str=0001101 */ - { 0xd, 5 }, /* str=01101 */ - }, - { /* i_total 9 */ - { 0xb, 8 }, /* str=00001011 */ - { 0xe, 8 }, /* str=00001110 */ - { 0xa, 7 }, /* str=0001010 */ - { 0xc, 6 }, /* str=001100 */ - }, - { /* i_total 10 */ - { 0xf, 9 }, /* str=000001111 */ - { 0xa, 8 }, /* str=00001010 */ - { 0xd, 8 }, /* str=00001101 */ - { 0xc, 7 }, /* str=0001100 */ - }, - { /* i_total 11 */ - { 0xb, 9 }, /* str=000001011 */ - { 0xe, 9 }, /* str=000001110 */ - { 0x9, 8 }, /* str=00001001 */ - { 0xc, 8 }, /* str=00001100 */ - }, - { /* i_total 12 */ - { 0x8, 9 }, /* str=000001000 */ - { 0xa, 9 }, /* str=000001010 */ - { 0xd, 9 }, /* str=000001101 */ - { 0x8, 8 }, /* str=00001000 */ - }, - { /* i_total 13 */ - { 0xd, 10 }, /* str=0000001101 */ - { 0x7, 9 }, /* str=000000111 */ - { 0x9, 9 }, /* str=000001001 */ - { 0xc, 9 }, /* str=000001100 */ - }, - { /* i_total 14 */ - { 0x9, 10 }, /* str=0000001001 */ - { 0xc, 10 }, /* str=0000001100 */ - { 0xb, 10 }, /* str=0000001011 */ - { 0xa, 10 }, /* str=0000001010 */ - }, - { /* i_total 15 */ - { 0x5, 10 }, /* str=0000000101 */ - { 0x8, 10 }, /* str=0000001000 */ - { 0x7, 10 }, /* str=0000000111 */ - { 0x6, 10 }, /* str=0000000110 */ - }, - { /* i_total 16 */ - { 0x1, 10 }, /* str=0000000001 */ - { 0x4, 10 }, /* str=0000000100 */ - { 0x3, 10 }, /* str=0000000011 */ - { 0x2, 10 }, /* str=0000000010 */ - }, - }, - { /* table 3 */ - { /* i_total 1 */ - { 0x0, 6 }, /* str=000000 */ - { 0x1, 6 }, /* str=000001 */ - }, - { /* i_total 2 */ - { 0x4, 6 }, /* str=000100 */ - { 0x5, 6 }, /* str=000101 */ - { 0x6, 6 }, /* str=000110 */ - }, - { /* i_total 3 */ - { 0x8, 6 }, /* str=001000 */ - { 0x9, 6 }, /* str=001001 */ - { 0xa, 6 }, /* str=001010 */ - { 0xb, 6 }, /* str=001011 */ - }, - { /* i_total 4 */ - { 0xc, 6 }, /* str=001100 */ - { 0xd, 6 }, /* str=001101 */ - { 0xe, 6 }, /* str=001110 */ - { 0xf, 6 }, /* str=001111 */ - }, - { /* i_total 5 */ - { 0x10, 6 }, /* str=010000 */ - { 0x11, 6 }, /* str=010001 */ - { 0x12, 6 }, /* str=010010 */ - { 0x13, 6 }, /* str=010011 */ - }, - { /* i_total 6 */ - { 0x14, 6 }, /* str=010100 */ - { 0x15, 6 }, /* str=010101 */ - { 0x16, 6 }, /* str=010110 */ - { 0x17, 6 }, /* str=010111 */ - }, - { /* i_total 7 */ - { 0x18, 6 }, /* str=011000 */ - { 0x19, 6 }, /* str=011001 */ - { 0x1a, 6 }, /* str=011010 */ - { 0x1b, 6 }, /* str=011011 */ - }, - { /* i_total 8 */ - { 0x1c, 6 }, /* str=011100 */ - { 0x1d, 6 }, /* str=011101 */ - { 0x1e, 6 }, /* str=011110 */ - { 0x1f, 6 }, /* str=011111 */ - }, - { /* i_total 9 */ - { 0x20, 6 }, /* str=100000 */ - { 0x21, 6 }, /* str=100001 */ - { 0x22, 6 }, /* str=100010 */ - { 0x23, 6 }, /* str=100011 */ - }, - { /* i_total 10 */ - { 0x24, 6 }, /* str=100100 */ - { 0x25, 6 }, /* str=100101 */ - { 0x26, 6 }, /* str=100110 */ - { 0x27, 6 }, /* str=100111 */ - }, - { /* i_total 11 */ - { 0x28, 6 }, /* str=101000 */ - { 0x29, 6 }, /* str=101001 */ - { 0x2a, 6 }, /* str=101010 */ - { 0x2b, 6 }, /* str=101011 */ - }, - { /* i_total 12 */ - { 0x2c, 6 }, /* str=101100 */ - { 0x2d, 6 }, /* str=101101 */ - { 0x2e, 6 }, /* str=101110 */ - { 0x2f, 6 }, /* str=101111 */ - }, - { /* i_total 13 */ - { 0x30, 6 }, /* str=110000 */ - { 0x31, 6 }, /* str=110001 */ - { 0x32, 6 }, /* str=110010 */ - { 0x33, 6 }, /* str=110011 */ - }, - { /* i_total 14 */ - { 0x34, 6 }, /* str=110100 */ - { 0x35, 6 }, /* str=110101 */ - { 0x36, 6 }, /* str=110110 */ - { 0x37, 6 }, /* str=110111 */ - }, - { /* i_total 15 */ - { 0x38, 6 }, /* str=111000 */ - { 0x39, 6 }, /* str=111001 */ - { 0x3a, 6 }, /* str=111010 */ - { 0x3b, 6 }, /* str=111011 */ - }, - { /* i_total 16 */ - { 0x3c, 6 }, /* str=111100 */ - { 0x3d, 6 }, /* str=111101 */ - { 0x3e, 6 }, /* str=111110 */ - { 0x3f, 6 }, /* str=111111 */ - }, - }, - { /* table 4 */ - { /* i_total 1 */ - { 0x7, 6 }, /* str=000111 */ - { 0x1, 1 }, /* str=1 */ - }, - { /* i_total 2 */ - { 0x4, 6 }, /* str=000100 */ - { 0x6, 6 }, /* str=000110 */ - { 0x1, 3 }, /* str=001 */ - }, - { /* i_total 3 */ - { 0x3, 6 }, /* str=000011 */ - { 0x3, 7 }, /* str=0000011 */ - { 0x2, 7 }, /* str=0000010 */ - { 0x5, 6 }, /* str=000101 */ - }, - { /* i_total 4 */ - { 0x2, 6 }, /* str=000010 */ - { 0x3, 8 }, /* str=00000011 */ - { 0x2, 8 }, /* str=00000010 */ - { 0x0, 7 }, /* str=0000000 */ - }, - }, - { /* table 5 */ - { /* i_total 1 */ - { 0xf, 7 }, /* str=0001111 */ - { 0x1, 2 }, /* str=01 */ - }, - { /* i_total 2 */ - { 0xe, 7 }, /* str=0001110 */ - { 0xd, 7 }, /* str=0001101 */ - { 0x1, 3 }, /* str=001 */ - }, - { /* i_total 3 */ - { 0x7, 9 }, /* str=000000111 */ - { 0xc, 7 }, /* str=0001100 */ - { 0xb, 7 }, /* str=0001011 */ - { 0x1, 5 }, /* str=00001 */ - }, - { /* i_total 4 */ - { 0x6, 9 }, /* str=000000110 */ - { 0x5, 9 }, /* str=000000101 */ - { 0xa, 7 }, /* str=0001010 */ - { 0x1, 6 }, /* str=000001 */ - }, - { /* i_total 5 */ - { 0x7, 10 }, /* str=0000000111 */ - { 0x6, 10 }, /* str=0000000110 */ - { 0x4, 9 }, /* str=000000100 */ - { 0x9, 7 }, /* str=0001001 */ - }, - { /* i_total 6 */ - { 0x7, 11 }, /* str=00000000111 */ - { 0x6, 11 }, /* str=00000000110 */ - { 0x5, 10 }, /* str=0000000101 */ - { 0x8, 7 }, /* str=0001000 */ - }, - { /* i_total 7 */ - { 0x7, 12 }, /* str=000000000111 */ - { 0x6, 12 }, /* str=000000000110 */ - { 0x5, 11 }, /* str=00000000101 */ - { 0x4, 10 }, /* str=0000000100 */ - }, - { /* i_total 8 */ - { 0x7, 13 }, /* str=0000000000111 */ - { 0x5, 12 }, /* str=000000000101 */ - { 0x4, 12 }, /* str=000000000100 */ - { 0x4, 11 }, /* str=00000000100 */ - }, - }, -}; - -/* [i_total_coeff-1][i_total_zeros] */ -const vlc_t x264_total_zeros[15][16] = -{ - { /* i_total 1 */ - { 0x1, 1 }, /* str=1 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 3 }, /* str=010 */ - { 0x3, 4 }, /* str=0011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x3, 5 }, /* str=00011 */ - { 0x2, 5 }, /* str=00010 */ - { 0x3, 6 }, /* str=000011 */ - { 0x2, 6 }, /* str=000010 */ - { 0x3, 7 }, /* str=0000011 */ - { 0x2, 7 }, /* str=0000010 */ - { 0x3, 8 }, /* str=00000011 */ - { 0x2, 8 }, /* str=00000010 */ - { 0x3, 9 }, /* str=000000011 */ - { 0x2, 9 }, /* str=000000010 */ - { 0x1, 9 }, /* str=000000001 */ - }, - { /* i_total 2 */ - { 0x7, 3 }, /* str=111 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x5, 4 }, /* str=0101 */ - { 0x4, 4 }, /* str=0100 */ - { 0x3, 4 }, /* str=0011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x3, 5 }, /* str=00011 */ - { 0x2, 5 }, /* str=00010 */ - { 0x3, 6 }, /* str=000011 */ - { 0x2, 6 }, /* str=000010 */ - { 0x1, 6 }, /* str=000001 */ - { 0x0, 6 }, /* str=000000 */ - }, - { /* i_total 3 */ - { 0x5, 4 }, /* str=0101 */ - { 0x7, 3 }, /* str=111 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 4 }, /* str=0100 */ - { 0x3, 4 }, /* str=0011 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x3, 5 }, /* str=00011 */ - { 0x2, 5 }, /* str=00010 */ - { 0x1, 6 }, /* str=000001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x0, 6 }, /* str=000000 */ - }, - { /* i_total 4 */ - { 0x3, 5 }, /* str=00011 */ - { 0x7, 3 }, /* str=111 */ - { 0x5, 4 }, /* str=0101 */ - { 0x4, 4 }, /* str=0100 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 4 }, /* str=0011 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x2, 5 }, /* str=00010 */ - { 0x1, 5 }, /* str=00001 */ - { 0x0, 5 }, /* str=00000 */ - }, - { /* i_total 5 */ - { 0x5, 4 }, /* str=0101 */ - { 0x4, 4 }, /* str=0100 */ - { 0x3, 4 }, /* str=0011 */ - { 0x7, 3 }, /* str=111 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x1, 5 }, /* str=00001 */ - { 0x1, 4 }, /* str=0001 */ - { 0x0, 5 }, /* str=00000 */ - }, - { /* i_total 6 */ - { 0x1, 6 }, /* str=000001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x7, 3 }, /* str=111 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 6 }, /* str=000000 */ - }, - { /* i_total 7 */ - { 0x1, 6 }, /* str=000001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 6 }, /* str=000000 */ - }, - { /* i_total 8 */ - { 0x1, 6 }, /* str=000001 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x3, 3 }, /* str=011 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 6 }, /* str=000000 */ - }, - { /* i_total 9 */ - { 0x1, 6 }, /* str=000001 */ - { 0x0, 6 }, /* str=000000 */ - { 0x1, 4 }, /* str=0001 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x1, 3 }, /* str=001 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 5 }, /* str=00001 */ - }, - { /* i_total 10 */ - { 0x1, 5 }, /* str=00001 */ - { 0x0, 5 }, /* str=00000 */ - { 0x1, 3 }, /* str=001 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 4 }, /* str=0001 */ - }, - { /* i_total 11 */ - { 0x0, 4 }, /* str=0000 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 3 }, /* str=001 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 1 }, /* str=1 */ - { 0x3, 3 }, /* str=011 */ - }, - { /* i_total 12 */ - { 0x0, 4 }, /* str=0000 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 1 }, /* str=1 */ - { 0x1, 3 }, /* str=001 */ - }, - { /* i_total 13 */ - { 0x0, 3 }, /* str=000 */ - { 0x1, 3 }, /* str=001 */ - { 0x1, 1 }, /* str=1 */ - { 0x1, 2 }, /* str=01 */ - }, - { /* i_total 14 */ - { 0x0, 2 }, /* str=00 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 1 }, /* str=1 */ - }, - { /* i_total 15 */ - { 0x0, 1 }, /* str=0 */ - { 0x1, 1 }, /* str=1 */ - }, -}; - -/* [i_total_coeff-1][i_total_zeros] */ -const vlc_t x264_total_zeros_2x2_dc[3][4] = -{ - { /* i_total 1 */ - { 0x1, 1 }, /* str=1 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 3 } /* str=000 */ - }, - { /* i_total 2 */ - { 0x1, 1 }, /* str=1 */ - { 0x1, 2 }, /* str=01 */ - { 0x0, 2 }, /* str=00 */ - }, - { /* i_total 3 */ - { 0x1, 1 }, /* str=1 */ - { 0x0, 1 }, /* str=0 */ - }, -}; - -/* [i_total_coeff-1][i_total_zeros] */ -const vlc_t x264_total_zeros_2x4_dc[7][8] = -{ - { /* i_total 1 */ - { 0x1, 1 }, /* str=1 */ - { 0x2, 3 }, /* str=010 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 4 }, /* str=0010 */ - { 0x3, 4 }, /* str=0011 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x0, 5 }, /* str=00000 */ - }, - { /* i_total 2 */ - { 0x0, 3 }, /* str=000 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 3 }, /* str=001 */ - { 0x4, 3 }, /* str=100 */ - { 0x5, 3 }, /* str=101 */ - { 0x6, 3 }, /* str=110 */ - { 0x7, 3 }, /* str=111 */ - }, - { /* i_total 3 */ - { 0x0, 3 }, /* str=000 */ - { 0x1, 3 }, /* str=001 */ - { 0x1, 2 }, /* str=01 */ - { 0x2, 2 }, /* str=10 */ - { 0x6, 3 }, /* str=110 */ - { 0x7, 3 }, /* str=111 */ - }, - { /* i_total 4 */ - { 0x6, 3 }, /* str=110 */ - { 0x0, 2 }, /* str=00 */ - { 0x1, 2 }, /* str=01 */ - { 0x2, 2 }, /* str=10 */ - { 0x7, 3 }, /* str=111 */ - }, - { /* i_total 5 */ - { 0x0, 2 }, /* str=00 */ - { 0x1, 2 }, /* str=01 */ - { 0x2, 2 }, /* str=10 */ - { 0x3, 2 }, /* str=11 */ - }, - { /* i_total 6 */ - { 0x0, 2 }, /* str=00 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 1 }, /* str=1 */ - }, - { /* i_total 7 */ - { 0x0, 1 }, /* str=0 */ - { 0x1, 1 }, /* str=1 */ - } -}; - -/* [MIN( i_zero_left-1, 6 )][run_before] */ -static const vlc_t run_before[7][16] = -{ - { /* i_zero_left 1 */ - { 0x1, 1 }, /* str=1 */ - { 0x0, 1 }, /* str=0 */ - }, - { /* i_zero_left 2 */ - { 0x1, 1 }, /* str=1 */ - { 0x1, 2 }, /* str=01 */ - { 0x0, 2 }, /* str=00 */ - }, - { /* i_zero_left 3 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x1, 2 }, /* str=01 */ - { 0x0, 2 }, /* str=00 */ - }, - { /* i_zero_left 4 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x1, 2 }, /* str=01 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 3 }, /* str=000 */ - }, - { /* i_zero_left 5 */ - { 0x3, 2 }, /* str=11 */ - { 0x2, 2 }, /* str=10 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 3 }, /* str=001 */ - { 0x0, 3 }, /* str=000 */ - }, - { /* i_zero_left 6 */ - { 0x3, 2 }, /* str=11 */ - { 0x0, 3 }, /* str=000 */ - { 0x1, 3 }, /* str=001 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 3 }, /* str=010 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - }, - { /* i_zero_left >6 */ - { 0x7, 3 }, /* str=111 */ - { 0x6, 3 }, /* str=110 */ - { 0x5, 3 }, /* str=101 */ - { 0x4, 3 }, /* str=100 */ - { 0x3, 3 }, /* str=011 */ - { 0x2, 3 }, /* str=010 */ - { 0x1, 3 }, /* str=001 */ - { 0x1, 4 }, /* str=0001 */ - { 0x1, 5 }, /* str=00001 */ - { 0x1, 6 }, /* str=000001 */ - { 0x1, 7 }, /* str=0000001 */ - { 0x1, 8 }, /* str=00000001 */ - { 0x1, 9 }, /* str=000000001 */ - { 0x1, 10 }, /* str=0000000001 */ - { 0x1, 11 }, /* str=00000000001 */ - }, -}; - vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE]; uint32_t x264_run_before[1<<16]; @@ -842,7 +71,9 @@ vlc->i_next = i_next; } - for( int i = 1; i < (1<<16); i++ ) + x264_run_before[0] = 0; + x264_run_before[1] = 0; + for( uint32_t i = 2; i < (1<<16); i++ ) { x264_run_level_t runlevel; ALIGNED_ARRAY_16( dctcoef, dct, [16] ); @@ -857,10 +88,10 @@ { int idx = X264_MIN(zeros, 7) - 1; int run = x264_clz( mask ); - int len = run_before[idx][run].i_size; + int len = x264_run_before_init[idx][run].i_size; size += len; bits <<= len; - bits |= run_before[idx][run].i_bits; + bits |= x264_run_before_init[idx][run].i_bits; zeros -= run; mask <<= run + 1; } diff -Nru x264-0.152.2854+gite9a5903/common/win32thread.c x264-0.158.2988+git-20191101.7817004/common/win32thread.c --- x264-0.152.2854+gite9a5903/common/win32thread.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/win32thread.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.c: windows threading ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * Pegasys Inc. @@ -31,7 +31,7 @@ /* Based on the agreed standing that x264 does not need to utilize >64 logical cpus, * this API does not detect nor utilize more than 64 cpus for systems that have them. */ -#include "common.h" +#include "base.h" #if HAVE_WINRT /* _beginthreadex() is technically the correct option, but it's only available for Desktop applications. @@ -51,7 +51,7 @@ static x264_pthread_mutex_t static_mutex; /* _beginthreadex requires that the start routine is __stdcall */ -static unsigned __stdcall x264_win32thread_worker( void *arg ) +static unsigned __stdcall win32thread_worker( void *arg ) { x264_pthread_t *h = arg; *h->p_ret = h->func( h->arg ); @@ -65,7 +65,7 @@ thread->arg = arg; thread->p_ret = &thread->ret; thread->ret = NULL; - thread->handle = (void*)_beginthreadex( NULL, 0, x264_win32thread_worker, thread, 0, NULL ); + thread->handle = (void*)_beginthreadex( NULL, 0, win32thread_worker, thread, 0, NULL ); return !thread->handle; } @@ -95,7 +95,15 @@ { static const x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER; if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) ) - *mutex = static_mutex; + { + int ret = 0; + EnterCriticalSection( &static_mutex ); + if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) ) + ret = x264_pthread_mutex_init( mutex, NULL ); + LeaveCriticalSection( &static_mutex ); + if( ret ) + return ret; + } EnterCriticalSection( mutex ); return 0; } diff -Nru x264-0.152.2854+gite9a5903/common/win32thread.h x264-0.158.2988+git-20191101.7817004/common/win32thread.h --- x264-0.152.2854+gite9a5903/common/win32thread.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/win32thread.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.h: windows threading ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * diff -Nru x264-0.152.2854+gite9a5903/common/x86/bitstream-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/bitstream-a.asm --- x264-0.152.2854+gite9a5903/common/x86/bitstream-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/bitstream-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* bitstream-a.asm: x86 bitstream functions ;***************************************************************************** -;* Copyright (C) 2010-2017 x264 project +;* Copyright (C) 2010-2019 x264 project ;* ;* Authors: Fiona Glaser ;* Henrik Gramner diff -Nru x264-0.152.2854+gite9a5903/common/x86/bitstream.h x264-0.158.2988+git-20191101.7817004/common/x86/bitstream.h --- x264-0.152.2854+gite9a5903/common/x86/bitstream.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/bitstream.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,64 @@ +/***************************************************************************** + * bitstream.h: x86 bitstream functions + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_X86_BITSTREAM_H +#define X264_X86_BITSTREAM_H + +#define x264_nal_escape_mmx2 x264_template(nal_escape_mmx2) +uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end ); +#define x264_nal_escape_sse2 x264_template(nal_escape_sse2) +uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end ); +#define x264_nal_escape_avx2 x264_template(nal_escape_avx2) +uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end ); +#define x264_cabac_block_residual_rd_internal_sse2 x264_template(cabac_block_residual_rd_internal_sse2) +void x264_cabac_block_residual_rd_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_rd_internal_lzcnt x264_template(cabac_block_residual_rd_internal_lzcnt) +void x264_cabac_block_residual_rd_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_rd_internal_ssse3 x264_template(cabac_block_residual_rd_internal_ssse3) +void x264_cabac_block_residual_rd_internal_ssse3 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_rd_internal_ssse3_lzcnt x264_template(cabac_block_residual_rd_internal_ssse3_lzcnt) +void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_rd_internal_avx512 x264_template(cabac_block_residual_rd_internal_avx512) +void x264_cabac_block_residual_rd_internal_avx512 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_8x8_rd_internal_sse2 x264_template(cabac_block_residual_8x8_rd_internal_sse2) +void x264_cabac_block_residual_8x8_rd_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_8x8_rd_internal_lzcnt x264_template(cabac_block_residual_8x8_rd_internal_lzcnt) +void x264_cabac_block_residual_8x8_rd_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_8x8_rd_internal_ssse3 x264_template(cabac_block_residual_8x8_rd_internal_ssse3) +void x264_cabac_block_residual_8x8_rd_internal_ssse3 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt x264_template(cabac_block_residual_8x8_rd_internal_ssse3_lzcnt) +void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_8x8_rd_internal_avx512 x264_template(cabac_block_residual_8x8_rd_internal_avx512) +void x264_cabac_block_residual_8x8_rd_internal_avx512 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_internal_sse2 x264_template(cabac_block_residual_internal_sse2) +void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_internal_lzcnt x264_template(cabac_block_residual_internal_lzcnt) +void x264_cabac_block_residual_internal_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_internal_avx2 x264_template(cabac_block_residual_internal_avx2) +void x264_cabac_block_residual_internal_avx2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); +#define x264_cabac_block_residual_internal_avx512 x264_template(cabac_block_residual_internal_avx512) +void x264_cabac_block_residual_internal_avx512( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/cabac-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/cabac-a.asm --- x264-0.152.2854+gite9a5903/common/x86/cabac-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/cabac-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* cabac-a.asm: x86 cabac ;***************************************************************************** -;* Copyright (C) 2008-2017 x264 project +;* Copyright (C) 2008-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -28,28 +28,26 @@ %include "x86inc.asm" %include "x86util.asm" -SECTION_RODATA - -coeff_abs_level1_ctx: db 1, 2, 3, 4, 0, 0, 0, 0 -coeff_abs_levelgt1_ctx: db 5, 5, 5, 5, 6, 7, 8, 9 -coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7 - db 4, 4, 4, 4, 5, 6, 7, 7 +SECTION_RODATA 64 %if ARCH_X86_64 -%macro COEFF_LAST_TABLE 17 - %define funccpu1 %1 - %define funccpu2 %2 - %define funccpu3 %3 +%macro COEFF_LAST_TABLE 4-18 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 + %xdefine %%funccpu1 %2 ; last4 + %xdefine %%funccpu2 %3 ; last64 + %xdefine %%funccpu3 %4 ; last15/last16 + coeff_last_%1: + %xdefine %%base coeff_last_%1 %rep 14 - %ifidn %4, 4 - dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu1) - %elifidn %4, 64 - dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu2) + %ifidn %5, 4 + dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu1) - %%base + %elifidn %5, 64 + dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu2) - %%base %else - dq mangle(private_prefix %+ _coeff_last%4_ %+ funccpu3) + dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu3) - %%base %endif %rotate 1 %endrep + dd 0, 0 ; 64-byte alignment padding %endmacro cextern coeff_last4_mmx2 @@ -68,33 +66,35 @@ cextern coeff_last64_avx2 cextern coeff_last64_avx512 -%ifdef PIC -SECTION .data -%endif -coeff_last_sse2: COEFF_LAST_TABLE mmx2, sse2, sse2, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 -coeff_last_lzcnt: COEFF_LAST_TABLE lzcnt, lzcnt, lzcnt, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 -coeff_last_avx2: COEFF_LAST_TABLE lzcnt, avx2, lzcnt, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 +COEFF_LAST_TABLE sse2, mmx2, sse2, sse2 +COEFF_LAST_TABLE lzcnt, lzcnt, lzcnt, lzcnt +COEFF_LAST_TABLE avx2, lzcnt, avx2, lzcnt %if HIGH_BIT_DEPTH -coeff_last_avx512: COEFF_LAST_TABLE avx512, avx512, avx512, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 +COEFF_LAST_TABLE avx512, avx512, avx512, avx512 %else -coeff_last_avx512: COEFF_LAST_TABLE lzcnt, avx512, avx512, 16, 15, 16, 4, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64 +COEFF_LAST_TABLE avx512, lzcnt, avx512, avx512 %endif %endif +coeff_abs_level1_ctx: db 1, 2, 3, 4, 0, 0, 0, 0 +coeff_abs_levelgt1_ctx: db 5, 5, 5, 5, 6, 7, 8, 9 +coeff_abs_level_transition: db 1, 2, 3, 3, 4, 5, 6, 7 + db 4, 4, 4, 4, 5, 6, 7, 7 + SECTION .text -cextern cabac_range_lps -cextern cabac_transition -cextern cabac_renorm_shift -cextern cabac_entropy +cextern_common cabac_range_lps +cextern_common cabac_transition +cextern_common cabac_renorm_shift +cextern_common cabac_entropy cextern cabac_size_unary cextern cabac_transition_unary -cextern significant_coeff_flag_offset -cextern significant_coeff_flag_offset_8x8 -cextern last_coeff_flag_offset -cextern last_coeff_flag_offset_8x8 -cextern coeff_abs_level_m1_offset -cextern count_cat_m1 +cextern_common significant_coeff_flag_offset +cextern_common significant_coeff_flag_offset_8x8 +cextern_common last_coeff_flag_offset +cextern_common last_coeff_flag_offset_8x8 +cextern_common coeff_abs_level_m1_offset +cextern_common count_cat_m1 cextern cabac_encode_ue_bypass %if ARCH_X86_64 @@ -117,15 +117,13 @@ endstruc %macro LOAD_GLOBAL 3-5 0 ; dst, base, off1, off2, tmp -%ifdef PIC - %ifidn %4, 0 - movzx %1, byte [%2+%3+r7-$$] - %else - lea %5, [r7+%4] - movzx %1, byte [%2+%3+%5-$$] - %endif -%else +%if ARCH_X86_64 == 0 movzx %1, byte [%2+%3+%4] +%elifidn %4, 0 + movzx %1, byte [%2+%3+r7-$$] +%else + lea %5, [r7+%4] + movzx %1, byte [%2+%3+%5-$$] %endif %endmacro @@ -150,9 +148,9 @@ shr t5d, 6 movifnidn t2d, r2m %if WIN64 - PUSH r7 + PUSH r7 %endif -%ifdef PIC +%if ARCH_X86_64 lea r7, [$$] %endif LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2, t4 @@ -179,7 +177,7 @@ shl t6d, t3b %endif %if WIN64 - POP r7 + POP r7 %endif mov [t0+cb.range], t4d add t3d, [t0+cb.queue] @@ -274,6 +272,7 @@ CABAC asm CABAC bmi2 +%if ARCH_X86_64 ; %1 = label name ; %2 = node_ctx init? %macro COEFF_ABS_LEVEL_GT1 2 @@ -404,6 +403,13 @@ %endif %endmacro +%macro COEFF_LAST 2 ; table, ctx_block_cat + lea r1, [%1 GLOBAL] + movsxd r6, [r1+4*%2] + add r6, r1 + call r6 +%endmacro + ;----------------------------------------------------------------------------- ; void x264_cabac_block_residual_rd_internal_sse2 ( dctcoef *l, int b_interlaced, ; int ctx_block_cat, x264_cabac_t *cb ); @@ -421,15 +427,9 @@ %define dct r4 %endif -%ifdef PIC - cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF +cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF lea r12, [$$] %define GLOBAL +r12-$$ -%else - cglobal func, 4,12,6,-maxcoeffs*SIZEOF_DCTCOEF - %define GLOBAL -%endif - shl r1d, 4 ; MB_INTERLACED*16 %if %1 lea r4, [significant_coeff_flag_offset_8x8+r1*4 GLOBAL] ; r12 = sig offset 8x8 @@ -452,7 +452,7 @@ add r4, rsp ; restore AC coefficient offset %endif ; for improved OOE performance, run coeff_last on the original coefficients. - call [%2+gprsize*r2 GLOBAL] ; coeff_last[ctx_block_cat]( dct ) + COEFF_LAST %2, r2 ; coeff_last[ctx_block_cat]( dct ) ; we know on 64-bit that the SSE2 versions of this function only ; overwrite r0, r1, and rax (r6). last64 overwrites r2 too, but we ; don't need r2 in 8x8 mode. @@ -539,7 +539,6 @@ RET %endmacro -%if ARCH_X86_64 INIT_XMM sse2 CABAC_RESIDUAL_RD 0, coeff_last_sse2 CABAC_RESIDUAL_RD 1, coeff_last_sse2 @@ -560,7 +559,6 @@ CABAC_RESIDUAL_RD 0, coeff_last_avx512 INIT_ZMM avx512 CABAC_RESIDUAL_RD 1, coeff_last_avx512 -%endif ;----------------------------------------------------------------------------- ; void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, @@ -638,15 +636,10 @@ %macro CABAC_RESIDUAL 1 cglobal cabac_block_residual_internal, 4,15,0,-4*64 -%ifdef PIC ; if we use the same r7 as in cabac_encode_decision, we can cheat and save a register. lea r7, [$$] %define lastm [rsp+4*1] %define GLOBAL +r7-$$ -%else - %define lastm r7d - %define GLOBAL -%endif shl r1d, 4 %define sigoffq r8 @@ -673,7 +666,7 @@ mov dct, r0 mov leveloffm, leveloffd - call [%1+gprsize*r2 GLOBAL] + COEFF_LAST %1, r2 mov lastm, eax ; put cabac in r0; needed for cabac_encode_decision mov r0, r3 @@ -764,7 +757,6 @@ RET %endmacro -%if ARCH_X86_64 INIT_XMM sse2 CABAC_RESIDUAL coeff_last_sse2 INIT_XMM lzcnt diff -Nru x264-0.152.2854+gite9a5903/common/x86/const-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/const-a.asm --- x264-0.152.2854+gite9a5903/common/x86/const-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/const-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* const-a.asm: x86 global constants ;***************************************************************************** -;* Copyright (C) 2010-2017 x264 project +;* Copyright (C) 2010-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser diff -Nru x264-0.152.2854+gite9a5903/common/x86/cpu-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/cpu-a.asm --- x264-0.152.2854+gite9a5903/common/x86/cpu-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/cpu-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* cpu-a.asm: x86 cpu utilities ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Laurent Aimar ;* Loren Merritt @@ -64,28 +64,21 @@ %endif ret -%if ARCH_X86_64 - ;----------------------------------------------------------------------------- -; void stack_align( void (*func)(void*), void *arg ); +; void cpu_emms( void ) ;----------------------------------------------------------------------------- -cglobal stack_align - push rbp - mov rbp, rsp -%if WIN64 - sub rsp, 32 ; shadow space -%endif - and rsp, ~(STACK_ALIGNMENT-1) - mov rax, r0 - mov r0, r1 - mov r1, r2 - mov r2, r3 - call rax - leave +cglobal cpu_emms + emms ret -%else +;----------------------------------------------------------------------------- +; void cpu_sfence( void ) +;----------------------------------------------------------------------------- +cglobal cpu_sfence + sfence + ret +%if ARCH_X86_64 == 0 ;----------------------------------------------------------------------------- ; int cpu_cpuid_test( void ) ; return 0 if unsupported @@ -111,35 +104,4 @@ pop ebx popfd ret - -cglobal stack_align - push ebp - mov ebp, esp - sub esp, 12 - and esp, ~(STACK_ALIGNMENT-1) - mov ecx, [ebp+8] - mov edx, [ebp+12] - mov [esp], edx - mov edx, [ebp+16] - mov [esp+4], edx - mov edx, [ebp+20] - mov [esp+8], edx - call ecx - leave - ret - %endif - -;----------------------------------------------------------------------------- -; void cpu_emms( void ) -;----------------------------------------------------------------------------- -cglobal cpu_emms - emms - ret - -;----------------------------------------------------------------------------- -; void cpu_sfence( void ) -;----------------------------------------------------------------------------- -cglobal cpu_sfence - sfence - ret diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-32.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-32.asm --- x264-0.152.2854+gite9a5903/common/x86/dct-32.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-32.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-32.asm: x86_32 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -161,8 +161,7 @@ %macro SUB8x8_DCT8 0 cglobal sub8x8_dct8, 3,3,8 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2 LOAD_DIFF8x4 4,5,6,7, none,none, r1, r2 @@ -211,8 +210,7 @@ %macro ADD8x8_IDCT8 0 cglobal add8x8_idct8, 2,2 add r1, 128 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue UNSPILL_SHUFFLE r1, 1,2,3,5,6,7, -6,-4,-2,2,4,6 IDCT8_1D d,0,1,2,3,4,5,6,7,[r1-128],[r1+0] mova [r1+0], m4 @@ -443,8 +441,7 @@ %macro DCT_SUB8 0 cglobal sub8x8_dct, 3,3 add r2, 4*FDEC_STRIDE -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue %if cpuflag(ssse3) mova m7, [hsub_mul] %endif @@ -476,8 +473,7 @@ ;----------------------------------------------------------------------------- cglobal sub8x8_dct8, 3,3 add r2, 4*FDEC_STRIDE -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue %if cpuflag(ssse3) mova m7, [hsub_mul] LOAD_DIFF8x4 0, 1, 2, 3, 4, 7, r1, r2-4*FDEC_STRIDE @@ -525,8 +521,7 @@ %macro ADD8x8 0 cglobal add8x8_idct, 2,2 add r0, 4*FDEC_STRIDE -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue UNSPILL_SHUFFLE r1, 0,2,1,3, 0,1,2,3 SBUTTERFLY qdq, 0, 1, 4 SBUTTERFLY qdq, 2, 3, 4 @@ -569,8 +564,7 @@ %macro ADD8x8_IDCT8 0 cglobal add8x8_idct8, 2,2 add r0, 4*FDEC_STRIDE -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue UNSPILL r1, 1,2,3,5,6,7 IDCT8_1D w,0,1,2,3,4,5,6,7,[r1+0],[r1+64] SPILL r1, 6 diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-64.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-64.asm --- x264-0.152.2854+gite9a5903/common/x86/dct-64.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-64.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-64.asm: x86_64 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -142,8 +142,7 @@ %macro SUB8x8_DCT8 0 cglobal sub8x8_dct8, 3,3,14 TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2 LOAD_DIFF8x4 4,5,6,7, none,none, r1, r2 @@ -192,8 +191,7 @@ cglobal add8x8_idct8, 2,2,16 add r1, 128 TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue mova m0, [r1-128] mova m1, [r1-96] mova m2, [r1-64] @@ -255,8 +253,7 @@ mova m7, [hsub_mul] %endif TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue SWAP 7, 9 LOAD_DIFF8x4 0, 1, 2, 3, 8, 9, r1, r2-4*FDEC_STRIDE LOAD_DIFF8x4 4, 5, 6, 7, 8, 9, r1, r2-4*FDEC_STRIDE @@ -279,8 +276,7 @@ mova m7, [hsub_mul] %endif TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue SWAP 7, 10 LOAD_DIFF8x4 0, 1, 2, 3, 4, 10, r1, r2-4*FDEC_STRIDE LOAD_DIFF8x4 4, 5, 6, 7, 8, 10, r1, r2-4*FDEC_STRIDE @@ -355,8 +351,7 @@ add r0, 4*FDEC_STRIDE pxor m7, m7 TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue SWAP 7, 9 movdqa m0, [r1+0x00] movdqa m1, [r1+0x10] @@ -391,8 +386,7 @@ add r0, 4*FDEC_STRIDE pxor m7, m7 TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue SWAP 7, 9 mova m0, [r1+ 0] mova m2, [r1+16] diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/dct-a.asm --- x264-0.152.2854+gite9a5903/common/x86/dct-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/dct-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-a.asm: x86 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Holger Lubitz ;* Loren Merritt @@ -510,8 +510,7 @@ add r0, 4*FDEC_STRIDE pxor m7, m7 TAIL_CALL .skip_prologue, 0 -global current_function %+ .skip_prologue -.skip_prologue: +cglobal_label .skip_prologue ; TRANSPOSE4x4Q mova xm0, [r1+ 0] mova xm1, [r1+32] @@ -622,8 +621,8 @@ SBUTTERFLY wd, 1, 0, 2 paddw m2, m1, m0 psubw m3, m1, m0 - paddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1 - psubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3 + vpaddw m2 {k1}, m1 ; 0+1+2+3 0<<1+1-2-3<<1 + vpsubw m3 {k1}, m0 ; 0-1-2+3 0-1<<1+2<<1-3 shufps m1, m2, m3, q2323 ; a3 b3 a2 b2 c3 d3 c2 d2 punpcklqdq m2, m3 ; a0 b0 a1 b1 c0 d0 c1 d1 SUMSUB_BA w, 1, 2, 3 @@ -631,8 +630,8 @@ shufps m1, m2, q2020 ; a0+a3 b0+b3 c0+c3 d0+d3 a0-a3 b0-b3 c0-c3 d0-d3 paddw m2, m1, m3 psubw m0, m1, m3 - paddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1 - psubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3' + vpaddw m2 {k2}, m1 ; 0'+1'+2'+3' 0'<<1+1'-2'-3'<<1 + vpsubw m0 {k2}, m3 ; 0'-1'-2'+3' 0'-1'<<1+2'<<1-3' %endmacro INIT_XMM avx512 @@ -744,7 +743,7 @@ paddw xmm0, xmm2 ; 0+1 0+1 2+3 2+3 punpckldq xmm0, xmm1 ; 0+1 0+1 0-1 0-1 2+3 2+3 2-3 2-3 punpcklqdq xmm1, xmm0, xmm0 - psubw xmm0 {k1}, xm3, xmm0 + vpsubw xmm0 {k1}, xm3, xmm0 paddw xmm0, xmm1 ; 0+1+2+3 0+1-2-3 0-1+2-3 0-1-2+3 movhps [r0], xmm0 RET diff -Nru x264-0.152.2854+gite9a5903/common/x86/dct.h x264-0.158.2988+git-20191101.7817004/common/x86/dct.h --- x264-0.152.2854+gite9a5903/common/x86/dct.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/dct.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: x86 transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -25,120 +25,225 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_I386_DCT_H -#define X264_I386_DCT_H +#ifndef X264_X86_DCT_H +#define X264_X86_DCT_H +#define x264_sub4x4_dct_mmx x264_template(sub4x4_dct_mmx) void x264_sub4x4_dct_mmx ( dctcoef dct [16], pixel *pix1, pixel *pix2 ); +#define x264_sub8x8_dct_mmx x264_template(sub8x8_dct_mmx) void x264_sub8x8_dct_mmx ( dctcoef dct[ 4][16], pixel *pix1, pixel *pix2 ); +#define x264_sub16x16_dct_mmx x264_template(sub16x16_dct_mmx) void x264_sub16x16_dct_mmx ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 ); +#define x264_sub8x8_dct_sse2 x264_template(sub8x8_dct_sse2) void x264_sub8x8_dct_sse2 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_sse2 x264_template(sub16x16_dct_sse2) void x264_sub16x16_dct_sse2 ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub4x4_dct_ssse3 x264_template(sub4x4_dct_ssse3) void x264_sub4x4_dct_ssse3 ( int16_t dct [16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub4x4_dct_avx512 x264_template(sub4x4_dct_avx512) void x264_sub4x4_dct_avx512 ( int16_t dct [16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_ssse3 x264_template(sub8x8_dct_ssse3) void x264_sub8x8_dct_ssse3 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_ssse3 x264_template(sub16x16_dct_ssse3) void x264_sub16x16_dct_ssse3( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_avx x264_template(sub8x8_dct_avx) void x264_sub8x8_dct_avx ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_avx x264_template(sub16x16_dct_avx) void x264_sub16x16_dct_avx ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_xop x264_template(sub8x8_dct_xop) void x264_sub8x8_dct_xop ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_xop x264_template(sub16x16_dct_xop) void x264_sub16x16_dct_xop ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_avx2 x264_template(sub8x8_dct_avx2) void x264_sub8x8_dct_avx2 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_avx512 x264_template(sub8x8_dct_avx512) void x264_sub8x8_dct_avx512 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_avx2 x264_template(sub16x16_dct_avx2) void x264_sub16x16_dct_avx2 ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct_avx512 x264_template(sub16x16_dct_avx512) void x264_sub16x16_dct_avx512( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_dc_mmx2 x264_template(sub8x8_dct_dc_mmx2) void x264_sub8x8_dct_dc_mmx2 ( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct_dc_sse2 x264_template(sub8x8_dct_dc_sse2) void x264_sub8x8_dct_dc_sse2 ( dctcoef dct [ 4], pixel *pix1, pixel *pix2 ); +#define x264_sub8x8_dct_dc_avx512 x264_template(sub8x8_dct_dc_avx512) void x264_sub8x8_dct_dc_avx512 ( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x16_dct_dc_sse2 x264_template(sub8x16_dct_dc_sse2) void x264_sub8x16_dct_dc_sse2 ( dctcoef dct [ 8], pixel *pix1, pixel *pix2 ); +#define x264_sub8x16_dct_dc_ssse3 x264_template(sub8x16_dct_dc_ssse3) void x264_sub8x16_dct_dc_ssse3 ( int16_t dct [ 8], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x16_dct_dc_avx x264_template(sub8x16_dct_dc_avx) void x264_sub8x16_dct_dc_avx ( dctcoef dct [ 8], pixel *pix1, pixel *pix2 ); +#define x264_sub8x16_dct_dc_avx512 x264_template(sub8x16_dct_dc_avx512) void x264_sub8x16_dct_dc_avx512( int16_t dct [ 8], uint8_t *pix1, uint8_t *pix2 ); +#define x264_add4x4_idct_mmx x264_template(add4x4_idct_mmx) void x264_add4x4_idct_mmx ( uint8_t *p_dst, int16_t dct [16] ); +#define x264_add4x4_idct_sse2 x264_template(add4x4_idct_sse2) void x264_add4x4_idct_sse2 ( uint16_t *p_dst, int32_t dct [16] ); +#define x264_add4x4_idct_sse4 x264_template(add4x4_idct_sse4) void x264_add4x4_idct_sse4 ( uint8_t *p_dst, int16_t dct [16] ); +#define x264_add4x4_idct_avx x264_template(add4x4_idct_avx) void x264_add4x4_idct_avx ( pixel *p_dst, dctcoef dct [16] ); +#define x264_add8x8_idct_mmx x264_template(add8x8_idct_mmx) void x264_add8x8_idct_mmx ( uint8_t *p_dst, int16_t dct[ 4][16] ); +#define x264_add8x8_idct_dc_mmx2 x264_template(add8x8_idct_dc_mmx2) void x264_add8x8_idct_dc_mmx2 ( uint8_t *p_dst, int16_t dct [ 4] ); +#define x264_add16x16_idct_mmx x264_template(add16x16_idct_mmx) void x264_add16x16_idct_mmx ( uint8_t *p_dst, int16_t dct[16][16] ); +#define x264_add16x16_idct_dc_mmx2 x264_template(add16x16_idct_dc_mmx2) void x264_add16x16_idct_dc_mmx2 ( uint8_t *p_dst, int16_t dct [16] ); +#define x264_add8x8_idct_sse2 x264_template(add8x8_idct_sse2) void x264_add8x8_idct_sse2 ( pixel *p_dst, dctcoef dct[ 4][16] ); +#define x264_add8x8_idct_avx x264_template(add8x8_idct_avx) void x264_add8x8_idct_avx ( pixel *p_dst, dctcoef dct[ 4][16] ); +#define x264_add8x8_idct_avx2 x264_template(add8x8_idct_avx2) void x264_add8x8_idct_avx2 ( pixel *p_dst, dctcoef dct[ 4][16] ); +#define x264_add8x8_idct_avx512 x264_template(add8x8_idct_avx512) void x264_add8x8_idct_avx512 ( uint8_t *p_dst, int16_t dct[ 4][16] ); +#define x264_add16x16_idct_sse2 x264_template(add16x16_idct_sse2) void x264_add16x16_idct_sse2 ( pixel *p_dst, dctcoef dct[16][16] ); +#define x264_add16x16_idct_avx x264_template(add16x16_idct_avx) void x264_add16x16_idct_avx ( pixel *p_dst, dctcoef dct[16][16] ); +#define x264_add16x16_idct_avx2 x264_template(add16x16_idct_avx2) void x264_add16x16_idct_avx2 ( pixel *p_dst, dctcoef dct[16][16] ); +#define x264_add8x8_idct_dc_sse2 x264_template(add8x8_idct_dc_sse2) void x264_add8x8_idct_dc_sse2 ( pixel *p_dst, dctcoef dct [ 4] ); +#define x264_add16x16_idct_dc_sse2 x264_template(add16x16_idct_dc_sse2) void x264_add16x16_idct_dc_sse2 ( pixel *p_dst, dctcoef dct [16] ); +#define x264_add8x8_idct_dc_ssse3 x264_template(add8x8_idct_dc_ssse3) void x264_add8x8_idct_dc_ssse3 ( uint8_t *p_dst, int16_t dct [ 4] ); +#define x264_add16x16_idct_dc_ssse3 x264_template(add16x16_idct_dc_ssse3) void x264_add16x16_idct_dc_ssse3( uint8_t *p_dst, int16_t dct [16] ); +#define x264_add8x8_idct_dc_avx x264_template(add8x8_idct_dc_avx) void x264_add8x8_idct_dc_avx ( pixel *p_dst, dctcoef dct [ 4] ); +#define x264_add16x16_idct_dc_avx x264_template(add16x16_idct_dc_avx) void x264_add16x16_idct_dc_avx ( pixel *p_dst, dctcoef dct [16] ); +#define x264_add16x16_idct_dc_avx2 x264_template(add16x16_idct_dc_avx2) void x264_add16x16_idct_dc_avx2 ( uint8_t *p_dst, int16_t dct [16] ); +#define x264_dct4x4dc_mmx2 x264_template(dct4x4dc_mmx2) void x264_dct4x4dc_mmx2 ( int16_t d[16] ); +#define x264_dct4x4dc_sse2 x264_template(dct4x4dc_sse2) void x264_dct4x4dc_sse2 ( int32_t d[16] ); +#define x264_dct4x4dc_avx x264_template(dct4x4dc_avx) void x264_dct4x4dc_avx ( int32_t d[16] ); +#define x264_idct4x4dc_mmx x264_template(idct4x4dc_mmx) void x264_idct4x4dc_mmx ( int16_t d[16] ); +#define x264_idct4x4dc_sse2 x264_template(idct4x4dc_sse2) void x264_idct4x4dc_sse2 ( int32_t d[16] ); +#define x264_idct4x4dc_avx x264_template(idct4x4dc_avx) void x264_idct4x4dc_avx ( int32_t d[16] ); +#define x264_dct2x4dc_mmx2 x264_template(dct2x4dc_mmx2) void x264_dct2x4dc_mmx2( dctcoef dct[8], dctcoef dct4x4[8][16] ); +#define x264_dct2x4dc_sse2 x264_template(dct2x4dc_sse2) void x264_dct2x4dc_sse2( dctcoef dct[8], dctcoef dct4x4[8][16] ); +#define x264_dct2x4dc_avx x264_template(dct2x4dc_avx) void x264_dct2x4dc_avx ( dctcoef dct[8], dctcoef dct4x4[8][16] ); +#define x264_sub8x8_dct8_mmx x264_template(sub8x8_dct8_mmx) void x264_sub8x8_dct8_mmx ( int16_t dct [64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct8_mmx x264_template(sub16x16_dct8_mmx) void x264_sub16x16_dct8_mmx ( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct8_sse2 x264_template(sub8x8_dct8_sse2) void x264_sub8x8_dct8_sse2 ( dctcoef dct [64], pixel *pix1, pixel *pix2 ); +#define x264_sub16x16_dct8_sse2 x264_template(sub16x16_dct8_sse2) void x264_sub16x16_dct8_sse2 ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 ); +#define x264_sub8x8_dct8_ssse3 x264_template(sub8x8_dct8_ssse3) void x264_sub8x8_dct8_ssse3 ( int16_t dct [64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub16x16_dct8_ssse3 x264_template(sub16x16_dct8_ssse3) void x264_sub16x16_dct8_ssse3( int16_t dct[4][64], uint8_t *pix1, uint8_t *pix2 ); +#define x264_sub8x8_dct8_sse4 x264_template(sub8x8_dct8_sse4) void x264_sub8x8_dct8_sse4 ( int32_t dct [64], uint16_t *pix1, uint16_t *pix2 ); +#define x264_sub16x16_dct8_sse4 x264_template(sub16x16_dct8_sse4) void x264_sub16x16_dct8_sse4 ( int32_t dct[4][64], uint16_t *pix1, uint16_t *pix2 ); +#define x264_sub8x8_dct8_avx x264_template(sub8x8_dct8_avx) void x264_sub8x8_dct8_avx ( dctcoef dct [64], pixel *pix1, pixel *pix2 ); +#define x264_sub16x16_dct8_avx x264_template(sub16x16_dct8_avx) void x264_sub16x16_dct8_avx ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 ); +#define x264_sub16x16_dct8_avx2 x264_template(sub16x16_dct8_avx2) void x264_sub16x16_dct8_avx2 ( dctcoef dct[4][64], pixel *pix1, pixel *pix2 ); +#define x264_add8x8_idct8_mmx x264_template(add8x8_idct8_mmx) void x264_add8x8_idct8_mmx ( uint8_t *dst, int16_t dct [64] ); +#define x264_add16x16_idct8_mmx x264_template(add16x16_idct8_mmx) void x264_add16x16_idct8_mmx ( uint8_t *dst, int16_t dct[4][64] ); +#define x264_add8x8_idct8_sse2 x264_template(add8x8_idct8_sse2) void x264_add8x8_idct8_sse2 ( pixel *dst, dctcoef dct [64] ); +#define x264_add16x16_idct8_sse2 x264_template(add16x16_idct8_sse2) void x264_add16x16_idct8_sse2( pixel *dst, dctcoef dct[4][64] ); +#define x264_add8x8_idct8_avx x264_template(add8x8_idct8_avx) void x264_add8x8_idct8_avx ( pixel *dst, dctcoef dct [64] ); +#define x264_add16x16_idct8_avx x264_template(add16x16_idct8_avx) void x264_add16x16_idct8_avx ( pixel *dst, dctcoef dct[4][64] ); +#define x264_zigzag_scan_8x8_frame_mmx2 x264_template(zigzag_scan_8x8_frame_mmx2) void x264_zigzag_scan_8x8_frame_mmx2 ( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_frame_sse2 x264_template(zigzag_scan_8x8_frame_sse2) void x264_zigzag_scan_8x8_frame_sse2 ( dctcoef level[64], dctcoef dct[64] ); +#define x264_zigzag_scan_8x8_frame_ssse3 x264_template(zigzag_scan_8x8_frame_ssse3) void x264_zigzag_scan_8x8_frame_ssse3 ( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_frame_avx x264_template(zigzag_scan_8x8_frame_avx) void x264_zigzag_scan_8x8_frame_avx ( dctcoef level[64], dctcoef dct[64] ); +#define x264_zigzag_scan_8x8_frame_xop x264_template(zigzag_scan_8x8_frame_xop) void x264_zigzag_scan_8x8_frame_xop ( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_frame_avx512 x264_template(zigzag_scan_8x8_frame_avx512) void x264_zigzag_scan_8x8_frame_avx512( dctcoef level[64], dctcoef dct[64] ); +#define x264_zigzag_scan_4x4_frame_mmx x264_template(zigzag_scan_4x4_frame_mmx) void x264_zigzag_scan_4x4_frame_mmx ( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_4x4_frame_sse2 x264_template(zigzag_scan_4x4_frame_sse2) void x264_zigzag_scan_4x4_frame_sse2 ( int32_t level[16], int32_t dct[16] ); +#define x264_zigzag_scan_4x4_frame_ssse3 x264_template(zigzag_scan_4x4_frame_ssse3) void x264_zigzag_scan_4x4_frame_ssse3 ( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_4x4_frame_avx x264_template(zigzag_scan_4x4_frame_avx) void x264_zigzag_scan_4x4_frame_avx ( dctcoef level[16], dctcoef dct[16] ); +#define x264_zigzag_scan_4x4_frame_xop x264_template(zigzag_scan_4x4_frame_xop) void x264_zigzag_scan_4x4_frame_xop ( dctcoef level[16], dctcoef dct[16] ); +#define x264_zigzag_scan_4x4_frame_avx512 x264_template(zigzag_scan_4x4_frame_avx512) void x264_zigzag_scan_4x4_frame_avx512( dctcoef level[16], dctcoef dct[16] ); +#define x264_zigzag_scan_4x4_field_sse x264_template(zigzag_scan_4x4_field_sse) void x264_zigzag_scan_4x4_field_sse ( int16_t level[16], int16_t dct[16] ); +#define x264_zigzag_scan_4x4_field_sse2 x264_template(zigzag_scan_4x4_field_sse2) void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] ); +#define x264_zigzag_scan_4x4_field_avx512 x264_template(zigzag_scan_4x4_field_avx512) void x264_zigzag_scan_4x4_field_avx512( dctcoef level[16], dctcoef dct[16] ); +#define x264_zigzag_scan_8x8_field_mmx2 x264_template(zigzag_scan_8x8_field_mmx2) void x264_zigzag_scan_8x8_field_mmx2 ( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_field_sse4 x264_template(zigzag_scan_8x8_field_sse4) void x264_zigzag_scan_8x8_field_sse4 ( int32_t level[64], int32_t dct[64] ); +#define x264_zigzag_scan_8x8_field_avx x264_template(zigzag_scan_8x8_field_avx) void x264_zigzag_scan_8x8_field_avx ( int32_t level[64], int32_t dct[64] ); +#define x264_zigzag_scan_8x8_field_xop x264_template(zigzag_scan_8x8_field_xop) void x264_zigzag_scan_8x8_field_xop ( int16_t level[64], int16_t dct[64] ); +#define x264_zigzag_scan_8x8_field_avx512 x264_template(zigzag_scan_8x8_field_avx512) void x264_zigzag_scan_8x8_field_avx512( dctcoef level[64], dctcoef dct[64] ); +#define x264_zigzag_sub_4x4_frame_avx x264_template(zigzag_sub_4x4_frame_avx) int x264_zigzag_sub_4x4_frame_avx ( int16_t level[16], const uint8_t *src, uint8_t *dst ); +#define x264_zigzag_sub_4x4_frame_ssse3 x264_template(zigzag_sub_4x4_frame_ssse3) int x264_zigzag_sub_4x4_frame_ssse3 ( int16_t level[16], const uint8_t *src, uint8_t *dst ); +#define x264_zigzag_sub_4x4ac_frame_avx x264_template(zigzag_sub_4x4ac_frame_avx) int x264_zigzag_sub_4x4ac_frame_avx ( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc ); +#define x264_zigzag_sub_4x4ac_frame_ssse3 x264_template(zigzag_sub_4x4ac_frame_ssse3) int x264_zigzag_sub_4x4ac_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc ); +#define x264_zigzag_sub_4x4_field_avx x264_template(zigzag_sub_4x4_field_avx) int x264_zigzag_sub_4x4_field_avx ( int16_t level[16], const uint8_t *src, uint8_t *dst ); +#define x264_zigzag_sub_4x4_field_ssse3 x264_template(zigzag_sub_4x4_field_ssse3) int x264_zigzag_sub_4x4_field_ssse3 ( int16_t level[16], const uint8_t *src, uint8_t *dst ); +#define x264_zigzag_sub_4x4ac_field_avx x264_template(zigzag_sub_4x4ac_field_avx) int x264_zigzag_sub_4x4ac_field_avx ( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc ); +#define x264_zigzag_sub_4x4ac_field_ssse3 x264_template(zigzag_sub_4x4ac_field_ssse3) int x264_zigzag_sub_4x4ac_field_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc ); +#define x264_zigzag_interleave_8x8_cavlc_mmx x264_template(zigzag_interleave_8x8_cavlc_mmx) void x264_zigzag_interleave_8x8_cavlc_mmx ( int16_t *dst, int16_t *src, uint8_t *nnz ); +#define x264_zigzag_interleave_8x8_cavlc_sse2 x264_template(zigzag_interleave_8x8_cavlc_sse2) void x264_zigzag_interleave_8x8_cavlc_sse2 ( dctcoef *dst, dctcoef *src, uint8_t *nnz ); +#define x264_zigzag_interleave_8x8_cavlc_avx x264_template(zigzag_interleave_8x8_cavlc_avx) void x264_zigzag_interleave_8x8_cavlc_avx ( dctcoef *dst, dctcoef *src, uint8_t *nnz ); +#define x264_zigzag_interleave_8x8_cavlc_avx2 x264_template(zigzag_interleave_8x8_cavlc_avx2) void x264_zigzag_interleave_8x8_cavlc_avx2 ( int16_t *dst, int16_t *src, uint8_t *nnz ); +#define x264_zigzag_interleave_8x8_cavlc_avx512 x264_template(zigzag_interleave_8x8_cavlc_avx512) void x264_zigzag_interleave_8x8_cavlc_avx512( dctcoef *dst, dctcoef *src, uint8_t *nnz ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/deblock-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/deblock-a.asm --- x264-0.152.2854+gite9a5903/common/x86/deblock-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/deblock-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* deblock-a.asm: x86 deblocking ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser diff -Nru x264-0.152.2854+gite9a5903/common/x86/deblock.h x264-0.158.2988+git-20191101.7817004/common/x86/deblock.h --- x264-0.152.2854+gite9a5903/common/x86/deblock.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/deblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,146 @@ +/***************************************************************************** + * deblock.h: x86 deblocking + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_X86_DEBLOCK_H +#define X264_X86_DEBLOCK_H + +#define x264_deblock_v_luma_sse2 x264_template(deblock_v_luma_sse2) +void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_luma_avx x264_template(deblock_v_luma_avx) +void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_sse2 x264_template(deblock_h_luma_sse2) +void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_avx x264_template(deblock_h_luma_avx) +void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_sse2 x264_template(deblock_v_chroma_sse2) +void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_avx x264_template(deblock_v_chroma_avx) +void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_sse2 x264_template(deblock_h_chroma_sse2) +void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_avx x264_template(deblock_h_chroma_avx) +void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mbaff_sse2 x264_template(deblock_h_chroma_mbaff_sse2) +void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mbaff_avx x264_template(deblock_h_chroma_mbaff_avx) +void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_422_mmx2 x264_template(deblock_h_chroma_422_mmx2) +void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_422_sse2 x264_template(deblock_h_chroma_422_sse2) +void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_422_avx x264_template(deblock_h_chroma_422_avx) +void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_luma_intra_sse2 x264_template(deblock_v_luma_intra_sse2) +void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_luma_intra_avx x264_template(deblock_v_luma_intra_avx) +void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_luma_intra_sse2 x264_template(deblock_h_luma_intra_sse2) +void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_luma_intra_avx x264_template(deblock_h_luma_intra_avx) +void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_sse2 x264_template(deblock_v_chroma_intra_sse2) +void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_avx x264_template(deblock_v_chroma_intra_avx) +void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_sse2 x264_template(deblock_h_chroma_intra_sse2) +void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_avx x264_template(deblock_h_chroma_intra_avx) +void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_422_intra_mmx2 x264_template(deblock_h_chroma_422_intra_mmx2) +void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_422_intra_sse2 x264_template(deblock_h_chroma_422_intra_sse2) +void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_422_intra_avx x264_template(deblock_h_chroma_422_intra_avx) +void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_strength_sse2 x264_template(deblock_strength_sse2) +void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_strength_ssse3 x264_template(deblock_strength_ssse3) +void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_strength_avx x264_template(deblock_strength_avx) +void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_strength_avx2 x264_template(deblock_strength_avx2) +void x264_deblock_strength_avx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); +#define x264_deblock_strength_avx512 x264_template(deblock_strength_avx512) +void x264_deblock_strength_avx512( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE], + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], + int mvy_limit, int bframe ); + +#define x264_deblock_h_chroma_intra_mbaff_mmx2 x264_template(deblock_h_chroma_intra_mbaff_mmx2) +void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_mbaff_sse2 x264_template(deblock_h_chroma_intra_mbaff_sse2) +void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_mbaff_avx x264_template(deblock_h_chroma_intra_mbaff_avx) +void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta ); +#if ARCH_X86 +#define x264_deblock_h_luma_mmx2 x264_template(deblock_h_luma_mmx2) +void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v8_luma_mmx2 x264_template(deblock_v8_luma_mmx2) +void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_v_chroma_mmx2 x264_template(deblock_v_chroma_mmx2) +void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mmx2 x264_template(deblock_h_chroma_mmx2) +void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_chroma_mbaff_mmx2 x264_template(deblock_h_chroma_mbaff_mmx2) +void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +#define x264_deblock_h_luma_intra_mmx2 x264_template(deblock_h_luma_intra_mmx2) +void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v8_luma_intra_mmx2 x264_template(deblock_v8_luma_intra_mmx2) +void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_mmx2 x264_template(deblock_v_chroma_intra_mmx2) +void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_h_chroma_intra_mmx2 x264_template(deblock_h_chroma_intra_mmx2) +void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#define x264_deblock_v_chroma_intra_mbaff_mmx2 x264_template(deblock_v_chroma_intra_mbaff_mmx2) +void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); + +#define x264_deblock_v_luma_mmx2 x264_template(deblock_v_luma_mmx2) +#define x264_deblock_v_luma_intra_mmx2 x264_template(deblock_v_luma_intra_mmx2) +#if HIGH_BIT_DEPTH +void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); +void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta ); +#else +// FIXME this wrapper has a significant cpu cost +static ALWAYS_INLINE void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ) +{ + x264_deblock_v8_luma_mmx2( pix, stride, alpha, beta, tc0 ); + x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 ); +} +static ALWAYS_INLINE void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta ) +{ + x264_deblock_v8_luma_intra_mmx2( pix, stride, alpha, beta ); + x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta ); +} +#endif // HIGH_BIT_DEPTH +#endif + +#endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-a2.asm x264-0.158.2988+git-20191101.7817004/common/x86/mc-a2.asm --- x264-0.152.2854+gite9a5903/common/x86/mc-a2.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-a2.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a2.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -64,10 +64,11 @@ mbtree_prop_list_avx512_shuf: dw 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 mbtree_fix8_unpack_shuf: db -1,-1, 1, 0,-1,-1, 3, 2,-1,-1, 5, 4,-1,-1, 7, 6 db -1,-1, 9, 8,-1,-1,11,10,-1,-1,13,12,-1,-1,15,14 -mbtree_fix8_pack_shuf: db 1, 0, 3, 2, 5, 4, 7, 6, 9, 8,11,10,13,12,15,14 +; bits 0-3: pshufb, bits 4-7: AVX-512 vpermq +mbtree_fix8_pack_shuf: db 0x01,0x20,0x43,0x62,0x15,0x34,0x57,0x76,0x09,0x08,0x0b,0x0a,0x0d,0x0c,0x0f,0x0e -pf_256: times 4 dd 256.0 -pf_inv256: times 4 dd 0.00390625 +pf_256: times 4 dd 256.0 +pf_inv16777216: times 4 dd 0x1p-24 pd_16: times 4 dd 16 @@ -1016,6 +1017,143 @@ INIT_YMM avx2 PLANE_COPY_CORE 1 +%macro PLANE_COPY_AVX512 1 ; swap +%if %1 +cglobal plane_copy_swap, 6,7 + vbroadcasti32x4 m4, [copy_swap_shuf] +%else +cglobal plane_copy, 6,7 +%endif + movsxdifnidn r4, r4d +%if %1 && HIGH_BIT_DEPTH + %define %%mload vmovdqu32 + lea r2, [r2+4*r4-64] + lea r0, [r0+4*r4-64] + neg r4 + mov r6d, r4d + shl r4, 2 + or r6d, 0xffff0010 + shrx r6d, r6d, r6d ; (1 << (w & 15)) - 1 + kmovw k1, r6d +%elif %1 || HIGH_BIT_DEPTH + %define %%mload vmovdqu16 + lea r2, [r2+2*r4-64] + lea r0, [r0+2*r4-64] + mov r6d, -1 + neg r4 + shrx r6d, r6d, r4d + add r4, r4 + kmovd k1, r6d +%else + %define %%mload vmovdqu8 + lea r2, [r2+1*r4-64] + lea r0, [r0+1*r4-64] + mov r6, -1 + neg r4 + shrx r6, r6, r4 +%if ARCH_X86_64 + kmovq k1, r6 +%else + kmovd k1, r6d + test r4d, 32 + jnz .l32 + kxnord k2, k2, k2 + kunpckdq k1, k1, k2 +.l32: +%endif +%endif + FIX_STRIDES r3, r1 + add r4, 4*64 + jge .small + mov r6, r4 + +.loop: ; >256 bytes/row + PREFETCHNT_ITER r2+r4+64, 4*64 + movu m0, [r2+r4-3*64] + movu m1, [r2+r4-2*64] + movu m2, [r2+r4-1*64] + movu m3, [r2+r4-0*64] +%if %1 + pshufb m0, m4 + pshufb m1, m4 + pshufb m2, m4 + pshufb m3, m4 +%endif + movnta [r0+r4-3*64], m0 + movnta [r0+r4-2*64], m1 + movnta [r0+r4-1*64], m2 + movnta [r0+r4-0*64], m3 + add r4, 4*64 + jl .loop + PREFETCHNT_ITER r2+r4+64, 4*64 + sub r4, 3*64 + jge .tail +.loop2: + movu m0, [r2+r4] +%if %1 + pshufb m0, m4 +%endif + movnta [r0+r4], m0 + add r4, 64 + jl .loop2 +.tail: + %%mload m0 {k1}{z}, [r2+r4] +%if %1 + pshufb m0, m4 +%endif + movnta [r0+r4], m0 + add r2, r3 + add r0, r1 + mov r4, r6 + dec r5d + jg .loop + sfence + RET + +.small: ; 65-256 bytes/row. skip non-temporal stores + sub r4, 3*64 + jge .tiny + mov r6, r4 +.small_loop: + PREFETCHNT_ITER r2+r4+64, 64 + movu m0, [r2+r4] +%if %1 + pshufb m0, m4 +%endif + mova [r0+r4], m0 + add r4, 64 + jl .small_loop + PREFETCHNT_ITER r2+r4+64, 64 + %%mload m0 {k1}{z}, [r2+r4] +%if %1 + pshufb m0, m4 +%endif + mova [r0+r4], m0 + add r2, r3 + add r0, r1 + mov r4, r6 + dec r5d + jg .small_loop + RET + +.tiny: ; 1-64 bytes/row. skip non-temporal stores + PREFETCHNT_ITER r2+r4+64, 64 + %%mload m0 {k1}{z}, [r2+r4] +%if %1 + pshufb m0, m4 +%endif + mova [r0+r4], m0 + add r2, r3 + add r0, r1 + dec r5d + jg .tiny + RET +%endmacro + +INIT_ZMM avx512 +PLANE_COPY_AVX512 0 +PLANE_COPY_AVX512 1 + %macro INTERLEAVE 4-5 ; dst, srcu, srcv, is_aligned, nt_hint %if HIGH_BIT_DEPTH %assign x 0 @@ -1258,22 +1396,55 @@ RET %endmacro ; LOAD_DEINTERLEAVE_CHROMA +%macro LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512 0 +cglobal load_deinterleave_chroma_fdec, 4,5 + vbroadcasti32x8 m0, [deinterleave_shuf32a] + mov r4d, 0x3333ff00 + kmovd k1, r4d + lea r4, [r2*3] + kshiftrd k2, k1, 16 +.loop: + vbroadcasti128 ym1, [r1] + vbroadcasti32x4 m1 {k1}, [r1+r2] + vbroadcasti128 ym2, [r1+r2*2] + vbroadcasti32x4 m2 {k1}, [r1+r4] + lea r1, [r1+r2*4] + pshufb m1, m0 + pshufb m2, m0 + vmovdqa32 [r0] {k2}, m1 + vmovdqa32 [r0+mmsize] {k2}, m2 + add r0, 2*mmsize + sub r3d, 4 + jg .loop + RET +%endmacro + %macro LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 0 cglobal load_deinterleave_chroma_fenc, 4,5 vbroadcasti128 m0, [deinterleave_shuf] lea r4, [r2*3] .loop: - mova xm1, [r1] - vinserti128 m1, m1, [r1+r2], 1 - mova xm2, [r1+r2*2] - vinserti128 m2, m2, [r1+r4], 1 + mova xm1, [r1] ; 0 + vinserti128 ym1, [r1+r2], 1 ; 1 +%if mmsize == 64 + mova xm2, [r1+r2*4] ; 4 + vinserti32x4 m1, [r1+r2*2], 2 ; 2 + vinserti32x4 m2, [r1+r4*2], 2 ; 6 + vinserti32x4 m1, [r1+r4], 3 ; 3 + lea r1, [r1+r2*4] + vinserti32x4 m2, [r1+r2], 1 ; 5 + vinserti32x4 m2, [r1+r4], 3 ; 7 +%else + mova xm2, [r1+r2*2] ; 2 + vinserti128 m2, [r1+r4], 1 ; 3 +%endif + lea r1, [r1+r2*4] pshufb m1, m0 pshufb m2, m0 - mova [r0+0*FENC_STRIDE], m1 - mova [r0+2*FENC_STRIDE], m2 - lea r1, [r1+r2*4] - add r0, 4*FENC_STRIDE - sub r3d, 4 + mova [r0], m1 + mova [r0+mmsize], m2 + add r0, 2*mmsize + sub r3d, mmsize/8 jg .loop RET %endmacro ; LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 @@ -1498,6 +1669,9 @@ INIT_YMM avx2 LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 PLANE_DEINTERLEAVE_RGB +INIT_ZMM avx512 +LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512 +LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 %endif ; These functions are not general-use; not only do they require aligned input, but memcpy @@ -2481,8 +2655,8 @@ paddd m6, m7 ; i_mb_x += 8 pand m3, m8 ; {x, y} vprold m1, m3, 20 ; {y, x} << 4 - psubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y} - psubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4 + vpsubw m3 {k4}, m9, m3 ; {32-x, 32-y}, {32-x, y} + vpsubw m1 {k5}, m10, m1 ; ({32-y, x}, {y, x}) << 4 pmullw m3, m1 paddsw m3, m3 ; prevent signed overflow in idx0 (32*32<<5 == 0x8000) pmulhrsw m2, m3, m4 ; idx01weight idx23weightp @@ -2493,11 +2667,11 @@ vpcmpuw k2, ym1, ym20, 1 ; {mbx, mbx+1} < width kunpckwd k2, k2, k2 psrad m1, m0, 16 - paddd m1 {k6}, m11 + vpaddd m1 {k6}, m11 vpcmpud k1 {k1}, m1, m13, 1 ; mby < height | mby+1 < height pmaddwd m0, m15 - paddd m0 {k6}, m14 ; idx0 | idx2 + vpaddd m0 {k6}, m14 ; idx0 | idx2 vmovdqu16 m2 {k2}{z}, m2 ; idx01weight | idx23weight vptestmd k1 {k1}, m2, m2 ; mask out offsets with no changes @@ -2589,9 +2763,9 @@ ;----------------------------------------------------------------------------- cglobal mbtree_fix8_unpack, 3,4 %if mmsize == 32 - vbroadcastf128 m2, [pf_inv256] + vbroadcastf128 m2, [pf_inv16777216] %else - movaps m2, [pf_inv256] + movaps m2, [pf_inv16777216] mova m4, [mbtree_fix8_unpack_shuf+16] %endif mova m3, [mbtree_fix8_unpack_shuf] @@ -2612,8 +2786,6 @@ pshufb m0, m1, m3 pshufb m1, m4 %endif - psrad m0, 16 ; sign-extend - psrad m1, 16 cvtdq2ps m0, m0 cvtdq2ps m1, m1 mulps m0, m2 @@ -2627,8 +2799,7 @@ jz .end .scalar: movzx r3d, word [r1+2*r2+mmsize] - rol r3w, 8 - movsx r3d, r3w + bswap r3d ; Use 3-arg cvtsi2ss as a workaround for the fact that the instruction has a stupid dependency on ; dst which causes terrible performance when used in a loop otherwise. Blame Intel for poor design. cvtsi2ss xm0, xm2, r3d @@ -2644,3 +2815,69 @@ MBTREE_FIX8 INIT_YMM avx2 MBTREE_FIX8 + +%macro MBTREE_FIX8_AVX512_END 0 + add r2, mmsize/2 + jle .loop + cmp r2d, mmsize/2 + jl .tail + RET +.tail: + ; Do the final loop iteration with partial masking to handle the remaining elements. + shrx r3d, r3d, r2d ; (1 << count) - 1 + kmovd k1, r3d + kshiftrd k2, k1, 16 + jmp .loop +%endmacro + +INIT_ZMM avx512 +cglobal mbtree_fix8_pack, 3,4 + vbroadcastf32x4 m2, [pf_256] + vbroadcasti32x4 m3, [mbtree_fix8_pack_shuf] + psrld xm4, xm3, 4 + pmovzxbq m4, xm4 + sub r2d, mmsize/2 + mov r3d, -1 + movsxdifnidn r2, r2d + lea r1, [r1+4*r2] + lea r0, [r0+2*r2] + neg r2 + jg .tail + kmovd k1, r3d + kmovw k2, k1 +.loop: + vmulps m0 {k1}{z}, m2, [r1+4*r2] + vmulps m1 {k2}{z}, m2, [r1+4*r2+mmsize] + cvttps2dq m0, m0 + cvttps2dq m1, m1 + packssdw m0, m1 + pshufb m0, m3 + vpermq m0, m4, m0 + vmovdqu16 [r0+2*r2] {k1}, m0 + MBTREE_FIX8_AVX512_END + +cglobal mbtree_fix8_unpack, 3,4 + vbroadcasti32x8 m3, [mbtree_fix8_unpack_shuf] + vbroadcastf32x4 m2, [pf_inv16777216] + sub r2d, mmsize/2 + mov r3d, -1 + movsxdifnidn r2, r2d + lea r1, [r1+2*r2] + lea r0, [r0+4*r2] + neg r2 + jg .tail + kmovw k1, r3d + kmovw k2, k1 +.loop: + mova m1, [r1+2*r2] + vshufi32x4 m0, m1, m1, q1100 + vshufi32x4 m1, m1, m1, q3322 + pshufb m0, m3 + pshufb m1, m3 + cvtdq2ps m0, m0 + cvtdq2ps m1, m1 + mulps m0, m2 + mulps m1, m2 + vmovaps [r0+4*r2] {k1}, m0 + vmovaps [r0+4*r2+mmsize] {k2}, m1 + MBTREE_FIX8_AVX512_END diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/mc-a.asm --- x264-0.152.2854+gite9a5903/common/x86/mc-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -1331,7 +1331,7 @@ sub r4, r2 shl r6, 4 ;jump = (offset + align*2)*48 %define avg_w16_addr avg_w16_align1_1_ssse3-(avg_w16_align2_2_ssse3-avg_w16_align1_1_ssse3) -%ifdef PIC +%if ARCH_X86_64 lea r7, [avg_w16_addr] add r6, r7 %else @@ -1515,6 +1515,25 @@ PREFETCH_FENC 420 PREFETCH_FENC 422 +%if ARCH_X86_64 + DECLARE_REG_TMP 4 +%else + DECLARE_REG_TMP 2 +%endif + +cglobal prefetch_fenc_400, 2,3 + movifnidn t0d, r4m + FIX_STRIDES r1 + and t0d, 3 + imul t0d, r1d + lea r0, [r0+t0*4+64*SIZEOF_PIXEL] + prefetcht0 [r0] + prefetcht0 [r0+r1] + lea r0, [r0+r1*2] + prefetcht0 [r0] + prefetcht0 [r0+r1] + RET + ;----------------------------------------------------------------------------- ; void prefetch_ref( pixel *pix, intptr_t stride, int parity ) ;----------------------------------------------------------------------------- @@ -2001,7 +2020,7 @@ %if cpuflag(cache64) mov t0d, r3d and t0d, 7 -%ifdef PIC +%if ARCH_X86_64 lea t1, [ch_shuf_adj] movddup xm5, [t1 + t0*4] %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc-c.c x264-0.158.2988+git-20191101.7817004/common/x86/mc-c.c --- x264-0.152.2854+gite9a5903/common/x86/mc-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/mc-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -28,6 +28,40 @@ #include "common/common.h" #include "mc.h" +#define x264_pixel_avg_16x16_avx2 x264_template(pixel_avg_16x16_avx2) +#define x264_pixel_avg_16x16_avx512 x264_template(pixel_avg_16x16_avx512) +#define x264_pixel_avg_16x16_mmx2 x264_template(pixel_avg_16x16_mmx2) +#define x264_pixel_avg_16x16_sse2 x264_template(pixel_avg_16x16_sse2) +#define x264_pixel_avg_16x16_ssse3 x264_template(pixel_avg_16x16_ssse3) +#define x264_pixel_avg_16x8_avx2 x264_template(pixel_avg_16x8_avx2) +#define x264_pixel_avg_16x8_avx512 x264_template(pixel_avg_16x8_avx512) +#define x264_pixel_avg_16x8_mmx2 x264_template(pixel_avg_16x8_mmx2) +#define x264_pixel_avg_16x8_sse2 x264_template(pixel_avg_16x8_sse2) +#define x264_pixel_avg_16x8_ssse3 x264_template(pixel_avg_16x8_ssse3) +#define x264_pixel_avg_4x16_mmx2 x264_template(pixel_avg_4x16_mmx2) +#define x264_pixel_avg_4x16_sse2 x264_template(pixel_avg_4x16_sse2) +#define x264_pixel_avg_4x16_ssse3 x264_template(pixel_avg_4x16_ssse3) +#define x264_pixel_avg_4x2_mmx2 x264_template(pixel_avg_4x2_mmx2) +#define x264_pixel_avg_4x2_sse2 x264_template(pixel_avg_4x2_sse2) +#define x264_pixel_avg_4x2_ssse3 x264_template(pixel_avg_4x2_ssse3) +#define x264_pixel_avg_4x4_mmx2 x264_template(pixel_avg_4x4_mmx2) +#define x264_pixel_avg_4x4_sse2 x264_template(pixel_avg_4x4_sse2) +#define x264_pixel_avg_4x4_ssse3 x264_template(pixel_avg_4x4_ssse3) +#define x264_pixel_avg_4x8_mmx2 x264_template(pixel_avg_4x8_mmx2) +#define x264_pixel_avg_4x8_sse2 x264_template(pixel_avg_4x8_sse2) +#define x264_pixel_avg_4x8_ssse3 x264_template(pixel_avg_4x8_ssse3) +#define x264_pixel_avg_8x16_avx512 x264_template(pixel_avg_8x16_avx512) +#define x264_pixel_avg_8x16_mmx2 x264_template(pixel_avg_8x16_mmx2) +#define x264_pixel_avg_8x16_sse2 x264_template(pixel_avg_8x16_sse2) +#define x264_pixel_avg_8x16_ssse3 x264_template(pixel_avg_8x16_ssse3) +#define x264_pixel_avg_8x4_avx512 x264_template(pixel_avg_8x4_avx512) +#define x264_pixel_avg_8x4_mmx2 x264_template(pixel_avg_8x4_mmx2) +#define x264_pixel_avg_8x4_sse2 x264_template(pixel_avg_8x4_sse2) +#define x264_pixel_avg_8x4_ssse3 x264_template(pixel_avg_8x4_ssse3) +#define x264_pixel_avg_8x8_avx512 x264_template(pixel_avg_8x8_avx512) +#define x264_pixel_avg_8x8_mmx2 x264_template(pixel_avg_8x8_mmx2) +#define x264_pixel_avg_8x8_sse2 x264_template(pixel_avg_8x8_sse2) +#define x264_pixel_avg_8x8_ssse3 x264_template(pixel_avg_8x8_ssse3) #define DECL_SUF( func, args )\ void func##_mmx2 args;\ void func##_sse2 args;\ @@ -44,10 +78,43 @@ DECL_SUF( x264_pixel_avg_4x8, ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int )) DECL_SUF( x264_pixel_avg_4x4, ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int )) DECL_SUF( x264_pixel_avg_4x2, ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int )) +#undef DECL_SUF +#define x264_mc_weight_w12_mmx2 x264_template(mc_weight_w12_mmx2) +#define x264_mc_weight_w12_sse2 x264_template(mc_weight_w12_sse2) +#define x264_mc_weight_w16_avx2 x264_template(mc_weight_w16_avx2) +#define x264_mc_weight_w16_mmx2 x264_template(mc_weight_w16_mmx2) +#define x264_mc_weight_w16_sse2 x264_template(mc_weight_w16_sse2) +#define x264_mc_weight_w16_ssse3 x264_template(mc_weight_w16_ssse3) +#define x264_mc_weight_w20_avx2 x264_template(mc_weight_w20_avx2) +#define x264_mc_weight_w20_mmx2 x264_template(mc_weight_w20_mmx2) +#define x264_mc_weight_w20_sse2 x264_template(mc_weight_w20_sse2) +#define x264_mc_weight_w20_ssse3 x264_template(mc_weight_w20_ssse3) +#define x264_mc_weight_w4_mmx2 x264_template(mc_weight_w4_mmx2) +#define x264_mc_weight_w4_ssse3 x264_template(mc_weight_w4_ssse3) +#define x264_mc_weight_w8_avx2 x264_template(mc_weight_w8_avx2) +#define x264_mc_weight_w8_mmx2 x264_template(mc_weight_w8_mmx2) +#define x264_mc_weight_w8_sse2 x264_template(mc_weight_w8_sse2) +#define x264_mc_weight_w8_ssse3 x264_template(mc_weight_w8_ssse3) #define MC_WEIGHT(w,type) \ void x264_mc_weight_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); +#define x264_mc_offsetadd_w12_mmx2 x264_template(mc_offsetadd_w12_mmx2) +#define x264_mc_offsetadd_w16_mmx2 x264_template(mc_offsetadd_w16_mmx2) +#define x264_mc_offsetadd_w16_sse2 x264_template(mc_offsetadd_w16_sse2) +#define x264_mc_offsetadd_w20_mmx2 x264_template(mc_offsetadd_w20_mmx2) +#define x264_mc_offsetadd_w20_sse2 x264_template(mc_offsetadd_w20_sse2) +#define x264_mc_offsetadd_w4_mmx2 x264_template(mc_offsetadd_w4_mmx2) +#define x264_mc_offsetadd_w8_mmx2 x264_template(mc_offsetadd_w8_mmx2) +#define x264_mc_offsetadd_w8_sse2 x264_template(mc_offsetadd_w8_sse2) +#define x264_mc_offsetsub_w12_mmx2 x264_template(mc_offsetsub_w12_mmx2) +#define x264_mc_offsetsub_w16_mmx2 x264_template(mc_offsetsub_w16_mmx2) +#define x264_mc_offsetsub_w16_sse2 x264_template(mc_offsetsub_w16_sse2) +#define x264_mc_offsetsub_w20_mmx2 x264_template(mc_offsetsub_w20_mmx2) +#define x264_mc_offsetsub_w20_sse2 x264_template(mc_offsetsub_w20_sse2) +#define x264_mc_offsetsub_w4_mmx2 x264_template(mc_offsetsub_w4_mmx2) +#define x264_mc_offsetsub_w8_mmx2 x264_template(mc_offsetsub_w8_mmx2) +#define x264_mc_offsetsub_w8_sse2 x264_template(mc_offsetsub_w8_sse2) #define MC_WEIGHT_OFFSET(w,type) \ void x264_mc_offsetadd_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \ void x264_mc_offsetsub_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \ @@ -73,113 +140,200 @@ MC_WEIGHT( 8, avx2 ) MC_WEIGHT( 16, avx2 ) MC_WEIGHT( 20, avx2 ) -#undef MC_OFFSET +#undef MC_WEIGHT_OFFSET #undef MC_WEIGHT +#define x264_mc_copy_w4_mmx x264_template(mc_copy_w4_mmx) void x264_mc_copy_w4_mmx ( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w8_mmx x264_template(mc_copy_w8_mmx) void x264_mc_copy_w8_mmx ( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w8_sse x264_template(mc_copy_w8_sse) void x264_mc_copy_w8_sse ( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w16_mmx x264_template(mc_copy_w16_mmx) void x264_mc_copy_w16_mmx( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w16_sse x264_template(mc_copy_w16_sse) void x264_mc_copy_w16_sse( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w16_aligned_sse x264_template(mc_copy_w16_aligned_sse) void x264_mc_copy_w16_aligned_sse( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_mc_copy_w16_avx x264_template(mc_copy_w16_avx) void x264_mc_copy_w16_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int ); +#define x264_mc_copy_w16_aligned_avx x264_template(mc_copy_w16_aligned_avx) void x264_mc_copy_w16_aligned_avx( uint16_t *, intptr_t, uint16_t *, intptr_t, int ); +#define x264_prefetch_fenc_400_mmx2 x264_template(prefetch_fenc_400_mmx2) +void x264_prefetch_fenc_400_mmx2( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_prefetch_fenc_420_mmx2 x264_template(prefetch_fenc_420_mmx2) void x264_prefetch_fenc_420_mmx2( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_prefetch_fenc_422_mmx2 x264_template(prefetch_fenc_422_mmx2) void x264_prefetch_fenc_422_mmx2( pixel *, intptr_t, pixel *, intptr_t, int ); +#define x264_prefetch_ref_mmx2 x264_template(prefetch_ref_mmx2) void x264_prefetch_ref_mmx2( pixel *, intptr_t, int ); +#define x264_plane_copy_core_sse x264_template(plane_copy_core_sse) void x264_plane_copy_core_sse( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_core_avx x264_template(plane_copy_core_avx) void x264_plane_copy_core_avx( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_avx512 x264_template(plane_copy_avx512) +void x264_plane_copy_avx512( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_swap_core_ssse3 x264_template(plane_copy_swap_core_ssse3) void x264_plane_copy_swap_core_ssse3( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_swap_core_avx2 x264_template(plane_copy_swap_core_avx2) void x264_plane_copy_swap_core_avx2 ( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_swap_avx512 x264_template(plane_copy_swap_avx512) +void x264_plane_copy_swap_avx512( pixel *, intptr_t, pixel *, intptr_t, int w, int h ); +#define x264_plane_copy_interleave_core_mmx2 x264_template(plane_copy_interleave_core_mmx2) void x264_plane_copy_interleave_core_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +#define x264_plane_copy_interleave_core_sse2 x264_template(plane_copy_interleave_core_sse2) void x264_plane_copy_interleave_core_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +#define x264_plane_copy_interleave_core_avx x264_template(plane_copy_interleave_core_avx) void x264_plane_copy_interleave_core_avx( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu, pixel *srcv, intptr_t i_srcv, int w, int h ); +#define x264_plane_copy_deinterleave_sse2 x264_template(plane_copy_deinterleave_sse2) void x264_plane_copy_deinterleave_sse2( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_ssse3 x264_template(plane_copy_deinterleave_ssse3) void x264_plane_copy_deinterleave_ssse3( uint8_t *dsta, intptr_t i_dsta, uint8_t *dstb, intptr_t i_dstb, uint8_t *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_avx x264_template(plane_copy_deinterleave_avx) void x264_plane_copy_deinterleave_avx( uint16_t *dsta, intptr_t i_dsta, uint16_t *dstb, intptr_t i_dstb, uint16_t *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_avx2 x264_template(plane_copy_deinterleave_avx2) void x264_plane_copy_deinterleave_avx2( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_rgb_sse2 x264_template(plane_copy_deinterleave_rgb_sse2) void x264_plane_copy_deinterleave_rgb_sse2 ( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +#define x264_plane_copy_deinterleave_rgb_ssse3 x264_template(plane_copy_deinterleave_rgb_ssse3) void x264_plane_copy_deinterleave_rgb_ssse3( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +#define x264_plane_copy_deinterleave_rgb_avx2 x264_template(plane_copy_deinterleave_rgb_avx2) void x264_plane_copy_deinterleave_rgb_avx2 ( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +#define x264_plane_copy_deinterleave_v210_ssse3 x264_template(plane_copy_deinterleave_v210_ssse3) void x264_plane_copy_deinterleave_v210_ssse3 ( uint16_t *dstu, intptr_t i_dstu, uint16_t *dstv, intptr_t i_dstv, uint32_t *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_v210_avx x264_template(plane_copy_deinterleave_v210_avx) void x264_plane_copy_deinterleave_v210_avx ( uint16_t *dstu, intptr_t i_dstu, uint16_t *dstv, intptr_t i_dstv, uint32_t *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_v210_avx2 x264_template(plane_copy_deinterleave_v210_avx2) void x264_plane_copy_deinterleave_v210_avx2 ( uint16_t *dstu, intptr_t i_dstu, uint16_t *dstv, intptr_t i_dstv, uint32_t *src, intptr_t i_src, int w, int h ); +#define x264_plane_copy_deinterleave_v210_avx512 x264_template(plane_copy_deinterleave_v210_avx512) void x264_plane_copy_deinterleave_v210_avx512( uint16_t *dstu, intptr_t i_dstu, uint16_t *dstv, intptr_t i_dstv, uint32_t *src, intptr_t i_src, int w, int h ); +#define x264_store_interleave_chroma_mmx2 x264_template(store_interleave_chroma_mmx2) void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); +#define x264_store_interleave_chroma_sse2 x264_template(store_interleave_chroma_sse2) void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); +#define x264_store_interleave_chroma_avx x264_template(store_interleave_chroma_avx) void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height ); +#define x264_load_deinterleave_chroma_fenc_sse2 x264_template(load_deinterleave_chroma_fenc_sse2) void x264_load_deinterleave_chroma_fenc_sse2( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_ssse3 x264_template(load_deinterleave_chroma_fenc_ssse3) void x264_load_deinterleave_chroma_fenc_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_avx x264_template(load_deinterleave_chroma_fenc_avx) void x264_load_deinterleave_chroma_fenc_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_avx2 x264_template(load_deinterleave_chroma_fenc_avx2) void x264_load_deinterleave_chroma_fenc_avx2( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fenc_avx512 x264_template(load_deinterleave_chroma_fenc_avx512) +void x264_load_deinterleave_chroma_fenc_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fdec_sse2 x264_template(load_deinterleave_chroma_fdec_sse2) void x264_load_deinterleave_chroma_fdec_sse2( pixel *dst, pixel *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fdec_ssse3 x264_template(load_deinterleave_chroma_fdec_ssse3) void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fdec_avx x264_template(load_deinterleave_chroma_fdec_avx) void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fdec_avx2 x264_template(load_deinterleave_chroma_fdec_avx2) void x264_load_deinterleave_chroma_fdec_avx2( uint16_t *dst, uint16_t *src, intptr_t i_src, int height ); +#define x264_load_deinterleave_chroma_fdec_avx512 x264_template(load_deinterleave_chroma_fdec_avx512) +void x264_load_deinterleave_chroma_fdec_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height ); +#define x264_memcpy_aligned_sse x264_template(memcpy_aligned_sse) void *x264_memcpy_aligned_sse ( void *dst, const void *src, size_t n ); +#define x264_memcpy_aligned_avx x264_template(memcpy_aligned_avx) void *x264_memcpy_aligned_avx ( void *dst, const void *src, size_t n ); +#define x264_memcpy_aligned_avx512 x264_template(memcpy_aligned_avx512) void *x264_memcpy_aligned_avx512( void *dst, const void *src, size_t n ); +#define x264_memzero_aligned_sse x264_template(memzero_aligned_sse) void x264_memzero_aligned_sse ( void *dst, size_t n ); +#define x264_memzero_aligned_avx x264_template(memzero_aligned_avx) void x264_memzero_aligned_avx ( void *dst, size_t n ); +#define x264_memzero_aligned_avx512 x264_template(memzero_aligned_avx512) void x264_memzero_aligned_avx512( void *dst, size_t n ); +#define x264_integral_init4h_sse4 x264_template(integral_init4h_sse4) void x264_integral_init4h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride ); +#define x264_integral_init4h_avx2 x264_template(integral_init4h_avx2) void x264_integral_init4h_avx2( uint16_t *sum, uint8_t *pix, intptr_t stride ); +#define x264_integral_init8h_sse4 x264_template(integral_init8h_sse4) void x264_integral_init8h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride ); +#define x264_integral_init8h_avx x264_template(integral_init8h_avx) void x264_integral_init8h_avx ( uint16_t *sum, uint8_t *pix, intptr_t stride ); +#define x264_integral_init8h_avx2 x264_template(integral_init8h_avx2) void x264_integral_init8h_avx2( uint16_t *sum, uint8_t *pix, intptr_t stride ); +#define x264_integral_init4v_mmx x264_template(integral_init4v_mmx) void x264_integral_init4v_mmx ( uint16_t *sum8, uint16_t *sum4, intptr_t stride ); +#define x264_integral_init4v_sse2 x264_template(integral_init4v_sse2) void x264_integral_init4v_sse2 ( uint16_t *sum8, uint16_t *sum4, intptr_t stride ); +#define x264_integral_init4v_ssse3 x264_template(integral_init4v_ssse3) void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, intptr_t stride ); +#define x264_integral_init4v_avx2 x264_template(integral_init4v_avx2) void x264_integral_init4v_avx2( uint16_t *sum8, uint16_t *sum4, intptr_t stride ); +#define x264_integral_init8v_mmx x264_template(integral_init8v_mmx) void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride ); +#define x264_integral_init8v_sse2 x264_template(integral_init8v_sse2) void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride ); +#define x264_integral_init8v_avx2 x264_template(integral_init8v_avx2) void x264_integral_init8v_avx2( uint16_t *sum8, intptr_t stride ); +#define x264_mbtree_propagate_cost_sse2 x264_template(mbtree_propagate_cost_sse2) void x264_mbtree_propagate_cost_sse2 ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +#define x264_mbtree_propagate_cost_avx x264_template(mbtree_propagate_cost_avx) void x264_mbtree_propagate_cost_avx ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +#define x264_mbtree_propagate_cost_fma4 x264_template(mbtree_propagate_cost_fma4) void x264_mbtree_propagate_cost_fma4 ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +#define x264_mbtree_propagate_cost_avx2 x264_template(mbtree_propagate_cost_avx2) void x264_mbtree_propagate_cost_avx2 ( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +#define x264_mbtree_propagate_cost_avx512 x264_template(mbtree_propagate_cost_avx512) void x264_mbtree_propagate_cost_avx512( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs, uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len ); +#define x264_mbtree_fix8_pack_ssse3 x264_template(mbtree_fix8_pack_ssse3) void x264_mbtree_fix8_pack_ssse3( uint16_t *dst, float *src, int count ); +#define x264_mbtree_fix8_pack_avx2 x264_template(mbtree_fix8_pack_avx2) void x264_mbtree_fix8_pack_avx2 ( uint16_t *dst, float *src, int count ); +#define x264_mbtree_fix8_pack_avx512 x264_template(mbtree_fix8_pack_avx512) +void x264_mbtree_fix8_pack_avx512( uint16_t *dst, float *src, int count ); +#define x264_mbtree_fix8_unpack_ssse3 x264_template(mbtree_fix8_unpack_ssse3) void x264_mbtree_fix8_unpack_ssse3( float *dst, uint16_t *src, int count ); +#define x264_mbtree_fix8_unpack_avx2 x264_template(mbtree_fix8_unpack_avx2) void x264_mbtree_fix8_unpack_avx2 ( float *dst, uint16_t *src, int count ); +#define x264_mbtree_fix8_unpack_avx512 x264_template(mbtree_fix8_unpack_avx512) +void x264_mbtree_fix8_unpack_avx512( float *dst, uint16_t *src, int count ); +#define x264_mc_chroma_avx x264_template(mc_chroma_avx) +#define x264_mc_chroma_avx2 x264_template(mc_chroma_avx2) +#define x264_mc_chroma_cache64_ssse3 x264_template(mc_chroma_cache64_ssse3) +#define x264_mc_chroma_mmx2 x264_template(mc_chroma_mmx2) +#define x264_mc_chroma_sse2 x264_template(mc_chroma_sse2) +#define x264_mc_chroma_ssse3 x264_template(mc_chroma_ssse3) #define MC_CHROMA(cpu)\ void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,\ int dx, int dy, int i_width, int i_height ); @@ -189,7 +343,15 @@ MC_CHROMA(cache64_ssse3) MC_CHROMA(avx) MC_CHROMA(avx2) +#undef MC_CHROMA +#define x264_frame_init_lowres_core_avx x264_template(frame_init_lowres_core_avx) +#define x264_frame_init_lowres_core_avx2 x264_template(frame_init_lowres_core_avx2) +#define x264_frame_init_lowres_core_mmx2 x264_template(frame_init_lowres_core_mmx2) +#define x264_frame_init_lowres_core_cache32_mmx2 x264_template(frame_init_lowres_core_cache32_mmx2) +#define x264_frame_init_lowres_core_sse2 x264_template(frame_init_lowres_core_sse2) +#define x264_frame_init_lowres_core_ssse3 x264_template(frame_init_lowres_core_ssse3) +#define x264_frame_init_lowres_core_xop x264_template(frame_init_lowres_core_xop) #define LOWRES(cpu)\ void x264_frame_init_lowres_core_##cpu( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,\ intptr_t src_stride, intptr_t dst_stride, int width, int height ); @@ -200,7 +362,34 @@ LOWRES(avx) LOWRES(xop) LOWRES(avx2) +#undef LOWRES +#define x264_pixel_avg2_w10_mmx2 x264_template(pixel_avg2_w10_mmx2) +#define x264_pixel_avg2_w10_sse2 x264_template(pixel_avg2_w10_sse2) +#define x264_pixel_avg2_w12_cache32_mmx2 x264_template(pixel_avg2_w12_cache32_mmx2) +#define x264_pixel_avg2_w12_cache64_mmx2 x264_template(pixel_avg2_w12_cache64_mmx2) +#define x264_pixel_avg2_w12_mmx2 x264_template(pixel_avg2_w12_mmx2) +#define x264_pixel_avg2_w16_avx2 x264_template(pixel_avg2_w16_avx2) +#define x264_pixel_avg2_w16_cache32_mmx2 x264_template(pixel_avg2_w16_cache32_mmx2) +#define x264_pixel_avg2_w16_cache64_mmx2 x264_template(pixel_avg2_w16_cache64_mmx2) +#define x264_pixel_avg2_w16_cache64_sse2 x264_template(pixel_avg2_w16_cache64_sse2) +#define x264_pixel_avg2_w16_cache64_ssse3 x264_template(pixel_avg2_w16_cache64_ssse3) +#define x264_pixel_avg2_w16_mmx2 x264_template(pixel_avg2_w16_mmx2) +#define x264_pixel_avg2_w16_sse2 x264_template(pixel_avg2_w16_sse2) +#define x264_pixel_avg2_w18_avx2 x264_template(pixel_avg2_w18_avx2) +#define x264_pixel_avg2_w18_mmx2 x264_template(pixel_avg2_w18_mmx2) +#define x264_pixel_avg2_w18_sse2 x264_template(pixel_avg2_w18_sse2) +#define x264_pixel_avg2_w20_avx2 x264_template(pixel_avg2_w20_avx2) +#define x264_pixel_avg2_w20_cache32_mmx2 x264_template(pixel_avg2_w20_cache32_mmx2) +#define x264_pixel_avg2_w20_cache64_mmx2 x264_template(pixel_avg2_w20_cache64_mmx2) +#define x264_pixel_avg2_w20_cache64_sse2 x264_template(pixel_avg2_w20_cache64_sse2) +#define x264_pixel_avg2_w20_mmx2 x264_template(pixel_avg2_w20_mmx2) +#define x264_pixel_avg2_w20_sse2 x264_template(pixel_avg2_w20_sse2) +#define x264_pixel_avg2_w4_mmx2 x264_template(pixel_avg2_w4_mmx2) +#define x264_pixel_avg2_w8_cache32_mmx2 x264_template(pixel_avg2_w8_cache32_mmx2) +#define x264_pixel_avg2_w8_cache64_mmx2 x264_template(pixel_avg2_w8_cache64_mmx2) +#define x264_pixel_avg2_w8_mmx2 x264_template(pixel_avg2_w8_mmx2) +#define x264_pixel_avg2_w8_sse2 x264_template(pixel_avg2_w8_sse2) #define PIXEL_AVG_W(width,cpu)\ void x264_pixel_avg2_w##width##_##cpu( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ); /* This declares some functions that don't exist, but that isn't a problem. */ @@ -214,9 +403,11 @@ PIXEL_AVG_WALL(sse2) PIXEL_AVG_WALL(cache64_ssse3) PIXEL_AVG_WALL(avx2) +#undef PIXEL_AVG_W +#undef PIXEL_AVG_WALL #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\ -static void (* const x264_pixel_avg_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ) =\ +static void (* const pixel_avg_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ) =\ {\ NULL,\ x264_pixel_avg2_w4_##name1,\ @@ -228,6 +419,10 @@ #if HIGH_BIT_DEPTH /* we can replace w12/w20 with w10/w18 as only 9/17 pixels in fact are important */ +#undef x264_pixel_avg2_w12_mmx2 +#undef x264_pixel_avg2_w20_mmx2 +#undef x264_pixel_avg2_w20_sse2 +#undef x264_pixel_avg2_w20_avx2 #define x264_pixel_avg2_w12_mmx2 x264_pixel_avg2_w10_mmx2 #define x264_pixel_avg2_w20_mmx2 x264_pixel_avg2_w18_mmx2 #define x264_pixel_avg2_w12_sse2 x264_pixel_avg2_w10_sse2 @@ -259,7 +454,7 @@ #endif // HIGH_BIT_DEPTH #define MC_COPY_WTAB(instr, name1, name2, name3)\ -static void (* const x264_mc_copy_wtab_##instr[5])( pixel *, intptr_t, pixel *, intptr_t, int ) =\ +static void (* const mc_copy_wtab_##instr[5])( pixel *, intptr_t, pixel *, intptr_t, int ) =\ {\ NULL,\ x264_mc_copy_w4_##name1,\ @@ -277,7 +472,7 @@ #endif #define MC_WEIGHT_WTAB(function, instr, name1, name2, w12version)\ - static void (* x264_mc_##function##_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ) =\ +static void (* mc_##function##_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ) =\ {\ x264_mc_##function##_w4_##name1,\ x264_mc_##function##_w4_##name1,\ @@ -295,7 +490,7 @@ MC_WEIGHT_WTAB(offsetadd,sse2,mmx2,sse2,16) MC_WEIGHT_WTAB(offsetsub,sse2,mmx2,sse2,16) -static void x264_weight_cache_mmx2( x264_t *h, x264_weight_t *w ) +static void weight_cache_mmx2( x264_t *h, x264_weight_t *w ) { if( w->i_scale == 1<i_denom ) { @@ -327,7 +522,7 @@ MC_WEIGHT_WTAB(weight,ssse3,ssse3,ssse3,16) MC_WEIGHT_WTAB(weight,avx2,ssse3,avx2,16) -static void x264_weight_cache_mmx2( x264_t *h, x264_weight_t *w ) +static void weight_cache_mmx2( x264_t *h, x264_weight_t *w ) { int i; int16_t den1; @@ -350,7 +545,7 @@ } } -static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w ) +static void weight_cache_ssse3( x264_t *h, x264_weight_t *w ) { int i, den1; if( w->i_scale == 1<i_denom ) @@ -385,7 +580,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ - x264_pixel_avg_wtab_##instr1[i_width>>2](\ + pixel_avg_wtab_##instr1[i_width>>2](\ dst, i_dst_stride, src1, i_src_stride,\ src2, i_height );\ if( weight->weightfn )\ @@ -394,7 +589,7 @@ else if( weight->weightfn )\ weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );\ else\ - x264_mc_copy_wtab_##instr2[i_width>>2](dst, i_dst_stride, src1, i_src_stride, i_height );\ + mc_copy_wtab_##instr2[i_width>>2](dst, i_dst_stride, src1, i_src_stride, i_height );\ } MC_LUMA(mmx2,mmx2,mmx) @@ -423,7 +618,7 @@ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ - x264_pixel_avg_wtab_##name[i_width>>2](\ + pixel_avg_wtab_##name[i_width>>2](\ dst, *i_dst_stride, src1, i_src_stride,\ src2, i_height );\ if( weight->weightfn )\ @@ -455,6 +650,25 @@ GET_REF(cache64_ssse3_atom) #endif // !HIGH_BIT_DEPTH +#define x264_hpel_filter_avx x264_template(hpel_filter_avx) +#define x264_hpel_filter_avx2 x264_template(hpel_filter_avx2) +#define x264_hpel_filter_c_mmx2 x264_template(hpel_filter_c_mmx2) +#define x264_hpel_filter_c_sse2 x264_template(hpel_filter_c_sse2) +#define x264_hpel_filter_c_ssse3 x264_template(hpel_filter_c_ssse3) +#define x264_hpel_filter_c_avx x264_template(hpel_filter_c_avx) +#define x264_hpel_filter_c_avx2 x264_template(hpel_filter_c_avx2) +#define x264_hpel_filter_h_mmx2 x264_template(hpel_filter_h_mmx2) +#define x264_hpel_filter_h_sse2 x264_template(hpel_filter_h_sse2) +#define x264_hpel_filter_h_ssse3 x264_template(hpel_filter_h_ssse3) +#define x264_hpel_filter_h_avx x264_template(hpel_filter_h_avx) +#define x264_hpel_filter_h_avx2 x264_template(hpel_filter_h_avx2) +#define x264_hpel_filter_sse2 x264_template(hpel_filter_sse2) +#define x264_hpel_filter_ssse3 x264_template(hpel_filter_ssse3) +#define x264_hpel_filter_v_mmx2 x264_template(hpel_filter_v_mmx2) +#define x264_hpel_filter_v_sse2 x264_template(hpel_filter_v_sse2) +#define x264_hpel_filter_v_ssse3 x264_template(hpel_filter_v_ssse3) +#define x264_hpel_filter_v_avx x264_template(hpel_filter_v_avx) +#define x264_hpel_filter_v_avx2 x264_template(hpel_filter_v_avx2) #define HPEL(align, cpu, cpuv, cpuc, cpuh)\ void x264_hpel_filter_v_##cpuv( pixel *dst, pixel *src, int16_t *buf, intptr_t stride, intptr_t width);\ void x264_hpel_filter_c_##cpuc( pixel *dst, int16_t *buf, intptr_t width );\ @@ -550,18 +764,22 @@ } while( 0 ) #endif +#define x264_mbtree_propagate_list_internal_ssse3 x264_template(mbtree_propagate_list_internal_ssse3) PROPAGATE_LIST(ssse3) +#define x264_mbtree_propagate_list_internal_avx x264_template(mbtree_propagate_list_internal_avx) PROPAGATE_LIST(avx) +#define x264_mbtree_propagate_list_internal_avx2 x264_template(mbtree_propagate_list_internal_avx2) PROPAGATE_LIST(avx2) #if ARCH_X86_64 +#define x264_mbtree_propagate_list_internal_avx512 x264_template(mbtree_propagate_list_internal_avx512) void x264_mbtree_propagate_list_internal_avx512( size_t len, uint16_t *ref_costs, int16_t (*mvs)[2], int16_t *propagate_amount, uint16_t *lowres_costs, int bipred_weight, int mb_y, int width, int height, int stride, int list_mask ); -static void x264_mbtree_propagate_list_avx512( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2], - int16_t *propagate_amount, uint16_t *lowres_costs, - int bipred_weight, int mb_y, int len, int list ) +static void mbtree_propagate_list_avx512( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2], + int16_t *propagate_amount, uint16_t *lowres_costs, + int bipred_weight, int mb_y, int len, int list ) { x264_mbtree_propagate_list_internal_avx512( len, ref_costs, mvs, propagate_amount, lowres_costs, bipred_weight << 9, mb_y << 16, h->mb.i_mb_width, h->mb.i_mb_height, h->mb.i_mb_stride, @@ -584,11 +802,12 @@ if( !(cpu&X264_CPU_MMX2) ) return; + pf->prefetch_fenc_400 = x264_prefetch_fenc_400_mmx2; pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2; pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2; pf->prefetch_ref = x264_prefetch_ref_mmx2; - pf->plane_copy_interleave = x264_plane_copy_interleave_mmx2; + pf->plane_copy_interleave = plane_copy_interleave_mmx2; pf->store_interleave_chroma = x264_store_interleave_chroma_mmx2; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmx2; @@ -605,10 +824,10 @@ pf->get_ref = get_ref_mmx2; pf->mc_chroma = x264_mc_chroma_mmx2; pf->hpel_filter = x264_hpel_filter_mmx2; - pf->weight = x264_mc_weight_wtab_mmx2; - pf->weight_cache = x264_weight_cache_mmx2; - pf->offsetadd = x264_mc_offsetadd_wtab_mmx2; - pf->offsetsub = x264_mc_offsetsub_wtab_mmx2; + pf->weight = mc_weight_wtab_mmx2; + pf->weight_cache = weight_cache_mmx2; + pf->offsetadd = mc_offsetadd_wtab_mmx2; + pf->offsetsub = mc_offsetsub_wtab_mmx2; pf->frame_init_lowres_core = x264_frame_init_lowres_core_mmx2; @@ -616,7 +835,7 @@ { pf->memcpy_aligned = x264_memcpy_aligned_sse; pf->memzero_aligned = x264_memzero_aligned_sse; - pf->plane_copy = x264_plane_copy_sse; + pf->plane_copy = plane_copy_sse; } #if HIGH_BIT_DEPTH @@ -633,9 +852,9 @@ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2; - pf->plane_copy_interleave = x264_plane_copy_interleave_sse2; + pf->plane_copy_interleave = plane_copy_interleave_sse2; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2; - pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2; + pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_sse2; if( cpu&X264_CPU_SSE2_IS_FAST ) { @@ -648,8 +867,8 @@ pf->integral_init8v = x264_integral_init8v_sse2; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2; pf->store_interleave_chroma = x264_store_interleave_chroma_sse2; - pf->offsetadd = x264_mc_offsetadd_wtab_sse2; - pf->offsetsub = x264_mc_offsetsub_wtab_sse2; + pf->offsetadd = mc_offsetadd_wtab_sse2; + pf->offsetsub = mc_offsetsub_wtab_sse2; if( cpu&X264_CPU_SSE2_IS_SLOW ) return; @@ -665,7 +884,7 @@ pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_sse2; pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse; - pf->weight = x264_mc_weight_wtab_sse2; + pf->weight = mc_weight_wtab_sse2; if( !(cpu&X264_CPU_STACK_MOD4) ) pf->mc_chroma = x264_mc_chroma_sse2; @@ -674,9 +893,9 @@ return; pf->frame_init_lowres_core = x264_frame_init_lowres_core_ssse3; - pf->plane_copy_swap = x264_plane_copy_swap_ssse3; + pf->plane_copy_swap = plane_copy_swap_ssse3; pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_ssse3; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3; + pf->mbtree_propagate_list = mbtree_propagate_list_ssse3; pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_ssse3; pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_ssse3; @@ -689,9 +908,9 @@ pf->frame_init_lowres_core = x264_frame_init_lowres_core_avx; pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx; - pf->plane_copy_interleave = x264_plane_copy_interleave_avx; + pf->plane_copy_interleave = plane_copy_interleave_avx; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx; - pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx; + pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_avx; pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx; pf->store_interleave_chroma = x264_store_interleave_chroma_avx; pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_avx; @@ -706,6 +925,7 @@ { pf->mc_luma = mc_luma_avx2; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx2; + pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2; pf->plane_copy_deinterleave_v210 = x264_plane_copy_deinterleave_v210_avx2; } @@ -738,18 +958,18 @@ pf->hpel_filter = x264_hpel_filter_sse2_amd; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_sse2; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_sse2; - pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_sse2; + pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_sse2; pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_sse2; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_sse2; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_sse2; if( !(cpu&X264_CPU_SSE2_IS_SLOW) ) { - pf->weight = x264_mc_weight_wtab_sse2; + pf->weight = mc_weight_wtab_sse2; if( !(cpu&X264_CPU_SLOW_ATOM) ) { - pf->offsetadd = x264_mc_offsetadd_wtab_sse2; - pf->offsetsub = x264_mc_offsetsub_wtab_sse2; + pf->offsetadd = mc_offsetadd_wtab_sse2; + pf->offsetsub = mc_offsetsub_wtab_sse2; } pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse; @@ -766,7 +986,7 @@ if( cpu&X264_CPU_SSE2_IS_FAST ) { pf->store_interleave_chroma = x264_store_interleave_chroma_sse2; // FIXME sse2fast? sse2medium? - pf->plane_copy_interleave = x264_plane_copy_interleave_sse2; + pf->plane_copy_interleave = plane_copy_interleave_sse2; pf->mc_luma = mc_luma_sse2; pf->get_ref = get_ref_sse2; if( cpu&X264_CPU_CACHELINE_64 ) @@ -789,9 +1009,9 @@ pf->avg[PIXEL_4x8] = x264_pixel_avg_4x8_ssse3; pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_ssse3; pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_ssse3; - pf->plane_copy_swap = x264_plane_copy_swap_ssse3; + pf->plane_copy_swap = plane_copy_swap_ssse3; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_ssse3; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_ssse3; + pf->mbtree_propagate_list = mbtree_propagate_list_ssse3; pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_ssse3; pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_ssse3; @@ -800,7 +1020,7 @@ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_ssse3; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_ssse3; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_ssse3; - pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_ssse3; + pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_ssse3; } if( !(cpu&X264_CPU_SLOW_PALIGNR) ) @@ -827,8 +1047,8 @@ } } - pf->weight_cache = x264_weight_cache_ssse3; - pf->weight = x264_mc_weight_wtab_ssse3; + pf->weight_cache = weight_cache_ssse3; + pf->weight = mc_weight_wtab_ssse3; if( !(cpu&(X264_CPU_SLOW_SHUFFLE|X264_CPU_SLOW_ATOM|X264_CPU_SLOW_PALIGNR)) ) pf->integral_init4v = x264_integral_init4v_ssse3; @@ -856,7 +1076,7 @@ { pf->hpel_filter = x264_hpel_filter_avx2; pf->mc_chroma = x264_mc_chroma_avx2; - pf->weight = x264_mc_weight_wtab_avx2; + pf->weight = mc_weight_wtab_avx2; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_avx2; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_avx2; pf->integral_init8v = x264_integral_init8v_avx2; @@ -865,6 +1085,7 @@ pf->integral_init4h = x264_integral_init4h_avx2; pf->frame_init_lowres_core = x264_frame_init_lowres_core_avx2; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_avx2; + pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2; } if( cpu&X264_CPU_AVX512 ) @@ -874,6 +1095,8 @@ pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_avx512; pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_avx512; pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_avx512; + pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx512; + pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx512; } #endif // HIGH_BIT_DEPTH @@ -881,22 +1104,21 @@ return; pf->memcpy_aligned = x264_memcpy_aligned_avx; pf->memzero_aligned = x264_memzero_aligned_avx; - pf->plane_copy = x264_plane_copy_avx; + pf->plane_copy = plane_copy_avx; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx; + pf->mbtree_propagate_list = mbtree_propagate_list_avx; if( cpu&X264_CPU_FMA4 ) pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_fma4; if( !(cpu&X264_CPU_AVX2) ) return; - pf->plane_copy_swap = x264_plane_copy_swap_avx2; + pf->plane_copy_swap = plane_copy_swap_avx2; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_avx2; - pf->plane_copy_deinterleave_yuyv = x264_plane_copy_deinterleave_yuyv_avx2; - pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx2; + pf->plane_copy_deinterleave_yuyv = plane_copy_deinterleave_yuyv_avx2; pf->get_ref = get_ref_avx2; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx2; - pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx2; + pf->mbtree_propagate_list = mbtree_propagate_list_avx2; pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_avx2; pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_avx2; @@ -904,8 +1126,12 @@ return; pf->memcpy_aligned = x264_memcpy_aligned_avx512; pf->memzero_aligned = x264_memzero_aligned_avx512; + pf->plane_copy = x264_plane_copy_avx512; + pf->plane_copy_swap = x264_plane_copy_swap_avx512; pf->mbtree_propagate_cost = x264_mbtree_propagate_cost_avx512; #if ARCH_X86_64 - pf->mbtree_propagate_list = x264_mbtree_propagate_list_avx512; + pf->mbtree_propagate_list = mbtree_propagate_list_avx512; #endif + pf->mbtree_fix8_pack = x264_mbtree_fix8_pack_avx512; + pf->mbtree_fix8_unpack = x264_mbtree_fix8_unpack_avx512; } diff -Nru x264-0.152.2854+gite9a5903/common/x86/mc.h x264-0.158.2988+git-20191101.7817004/common/x86/mc.h --- x264-0.152.2854+gite9a5903/common/x86/mc.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/mc.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -24,9 +24,10 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_I386_MC_H -#define X264_I386_MC_H +#ifndef X264_X86_MC_H +#define X264_X86_MC_H +#define x264_mc_init_mmx x264_template(mc_init_mmx) void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel-32.asm x264-0.158.2988+git-20191101.7817004/common/x86/pixel-32.asm --- x264-0.152.2854+gite9a5903/common/x86/pixel-32.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel-32.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel-32.asm: x86_32 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Laurent Aimar @@ -33,6 +33,8 @@ SECTION .text INIT_MMX mmx2 +%if HIGH_BIT_DEPTH == 0 + %macro LOAD_DIFF_4x8P 1 ; dx LOAD_DIFF m0, m7, none, [r0+%1], [r2+%1] LOAD_DIFF m1, m6, none, [r0+%1+r1], [r2+%1+r3] @@ -418,3 +420,4 @@ emms RET +%endif ; !HIGH_BIT_DEPTH diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/pixel-a.asm --- x264-0.152.2854+gite9a5903/common/x86/pixel-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel.asm: x86 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -131,6 +131,7 @@ sw_f0: dq 0xfff0, 0 pd_f0: times 4 dd 0xffff0000 +pd_2: times 4 dd 2 pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7 @@ -2223,7 +2224,7 @@ ;----------------------------------------------------------------------------- ; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res ) ;----------------------------------------------------------------------------- -cglobal intra_sa8d_x3_8x8, 3,3,14 +cglobal intra_sa8d_x3_8x8, 3,3,13 ; 8x8 hadamard pxor m8, m8 movq m0, [r0+0*FENC_STRIDE] @@ -2245,77 +2246,80 @@ HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 8 - ABSW2 m8, m9, m2, m3, m2, m3 - ABSW2 m10, m11, m4, m5, m4, m5 - paddusw m8, m10 - paddusw m9, m11 - ABSW2 m10, m11, m6, m7, m6, m7 - ABSW m13, m1, m1 - paddusw m10, m11 - paddusw m8, m9 - paddusw m13, m10 - paddusw m13, m8 + ABSW2 m8, m9, m2, m3, m2, m3 + ABSW2 m10, m11, m4, m5, m4, m5 + paddw m8, m10 + paddw m9, m11 + ABSW2 m10, m11, m6, m7, m6, m7 + ABSW m12, m1, m1 + paddw m10, m11 + paddw m8, m9 + paddw m12, m10 + paddw m12, m8 ; 1D hadamard of edges - movq m8, [r1+7] - movq m9, [r1+16] - pxor m10, m10 - punpcklbw m8, m10 - punpcklbw m9, m10 + movq m8, [r1+7] + movq m9, [r1+16] + pxor m10, m10 + punpcklbw m8, m10 + punpcklbw m9, m10 HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q1032, [pw_ppppmmmm] HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q2301, [pw_ppmmppmm] - pshuflw m10, m8, q2301 - pshuflw m11, m9, q2301 - pshufhw m10, m10, q2301 - pshufhw m11, m11, q2301 - pmullw m8, [pw_pmpmpmpm] - pmullw m11, [pw_pmpmpmpm] - paddw m8, m10 - paddw m9, m11 + pshuflw m10, m8, q2301 + pshuflw m11, m9, q2301 + pshufhw m10, m10, q2301 + pshufhw m11, m11, q2301 + pmullw m8, [pw_pmpmpmpm] + pmullw m11, [pw_pmpmpmpm] + paddw m8, m10 + paddw m9, m11 ; differences - paddw m10, m8, m9 - paddw m10, [pw_8] - pand m10, [sw_f0] - psllw m10, 2 ; dc - - psllw m8, 3 ; left edge - psubw m8, m0 - psubw m10, m0 - ABSW2 m8, m10, m8, m10, m11, m12 ; 1x8 sum - paddusw m8, m13 - paddusw m13, m10 - punpcklwd m0, m1 - punpcklwd m2, m3 - punpcklwd m4, m5 - punpcklwd m6, m7 - punpckldq m0, m2 - punpckldq m4, m6 - punpcklqdq m0, m4 ; transpose - psllw m9, 3 ; top edge - psrldq m2, m13, 2 ; 8x7 sum - psubw m0, m9 ; 8x1 sum - ABSW m0, m0, m9 - paddusw m2, m0 + paddw m10, m8, m9 + paddw m10, [pw_8] + pand m10, [sw_f0] + psllw m8, 3 ; left edge + psllw m10, 2 ; dc + psubw m8, m0 + psubw m10, m0 + punpcklwd m0, m1 + punpcklwd m2, m3 + punpcklwd m4, m5 + punpcklwd m6, m7 + ABSW m10, m10, m1 + paddw m10, m12 + punpckldq m0, m2 + punpckldq m4, m6 + punpcklqdq m0, m4 ; transpose + psllw m9, 3 ; top edge + psrldq m2, m10, 2 ; 8x7 sum + psubw m0, m9 ; 8x1 sum + ABSW2 m8, m0, m8, m0, m1, m3 ; 1x8 sum + paddw m8, m12 + paddusw m2, m0 ; 3x HADDW - movdqa m7, [pw_1] - pmaddwd m2, m7 - pmaddwd m8, m7 - pmaddwd m13, m7 - punpckhdq m3, m2, m8 - punpckldq m2, m8 - pshufd m5, m13, q3311 - paddd m2, m3 - paddd m5, m13 - punpckhqdq m0, m2, m5 - punpcklqdq m2, m5 - pavgw m0, m2 - pxor m1, m1 - pavgw m0, m1 - movq [r2], m0 ; i8x8_v, i8x8_h - psrldq m0, 8 - movd [r2+8], m0 ; i8x8_dc + mova m7, [pd_f0] + pandn m0, m7, m10 + psrld m10, 16 + pandn m1, m7, m8 + psrld m8, 16 + pandn m7, m2 + psrld m2, 16 + paddd m0, m10 + paddd m1, m8 + paddd m2, m7 + pshufd m3, m0, q2301 + punpckhdq m4, m2, m1 + punpckldq m2, m1 + paddd m3, m0 + paddd m2, m4 + punpckhqdq m0, m2, m3 + punpcklqdq m2, m3 + paddd m0, [pd_2] + paddd m0, m2 + psrld m0, 2 + mova [r2], m0 RET %endif ; ARCH_X86_64 %endmacro ; INTRA_SA8D_SSE2 @@ -2862,7 +2866,7 @@ ; output the predicted samples mov r3d, eax shr r3d, 16 -%ifdef PIC +%if ARCH_X86_64 lea r2, [%2_lut] movzx r2d, byte [r2+r3] %else @@ -4280,7 +4284,7 @@ ; instantiate satds -%if ARCH_X86_64 == 0 +%if ARCH_X86_64 == 0 && HIGH_BIT_DEPTH == 0 cextern pixel_sa8d_8x8_internal_mmx2 INIT_MMX mmx2 SA8D @@ -4744,7 +4748,7 @@ %endmacro %macro SATD_AVX512_END 0-1 0 ; sa8d - paddw m0 {k1}{z}, m1 ; zero-extend to dwords + vpaddw m0 {k1}{z}, m1 ; zero-extend to dwords %if ARCH_X86_64 %if mmsize == 64 vextracti32x8 ym1, m0, 1 @@ -5099,7 +5103,7 @@ je .skip ; faster only if this is the common case; remove branch if we use ssim on a macroblock level neg r2 -%ifdef PIC +%if ARCH_X86_64 lea r3, [mask_ff + 16] %xdefine %%mask r3 %else @@ -5549,7 +5553,7 @@ add r5, r6 xor r0d, r0d ; nmv mov [r5], r0d -%ifdef PIC +%if ARCH_X86_64 lea r1, [$$] %define GLOBAL +r1-$$ %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/pixel.h x264-0.158.2988+git-20191101.7817004/common/x86/pixel.h --- x264-0.152.2854+gite9a5903/common/x86/pixel.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/pixel.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: x86 pixel metrics ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -25,9 +25,345 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_I386_PIXEL_H -#define X264_I386_PIXEL_H +#ifndef X264_X86_PIXEL_H +#define X264_X86_PIXEL_H +#define x264_pixel_ads1_avx x264_template(pixel_ads1_avx) +#define x264_pixel_ads1_avx2 x264_template(pixel_ads1_avx2) +#define x264_pixel_ads1_mmx2 x264_template(pixel_ads1_mmx2) +#define x264_pixel_ads1_sse2 x264_template(pixel_ads1_sse2) +#define x264_pixel_ads1_ssse3 x264_template(pixel_ads1_ssse3) +#define x264_pixel_ads2_avx x264_template(pixel_ads2_avx) +#define x264_pixel_ads2_avx2 x264_template(pixel_ads2_avx2) +#define x264_pixel_ads2_mmx2 x264_template(pixel_ads2_mmx2) +#define x264_pixel_ads2_sse2 x264_template(pixel_ads2_sse2) +#define x264_pixel_ads2_ssse3 x264_template(pixel_ads2_ssse3) +#define x264_pixel_ads4_avx x264_template(pixel_ads4_avx) +#define x264_pixel_ads4_avx2 x264_template(pixel_ads4_avx2) +#define x264_pixel_ads4_mmx2 x264_template(pixel_ads4_mmx2) +#define x264_pixel_ads4_sse2 x264_template(pixel_ads4_sse2) +#define x264_pixel_ads4_ssse3 x264_template(pixel_ads4_ssse3) +#define x264_pixel_hadamard_ac_16x16_avx x264_template(pixel_hadamard_ac_16x16_avx) +#define x264_pixel_hadamard_ac_16x16_avx2 x264_template(pixel_hadamard_ac_16x16_avx2) +#define x264_pixel_hadamard_ac_16x16_mmx2 x264_template(pixel_hadamard_ac_16x16_mmx2) +#define x264_pixel_hadamard_ac_16x16_sse2 x264_template(pixel_hadamard_ac_16x16_sse2) +#define x264_pixel_hadamard_ac_16x16_sse4 x264_template(pixel_hadamard_ac_16x16_sse4) +#define x264_pixel_hadamard_ac_16x16_ssse3 x264_template(pixel_hadamard_ac_16x16_ssse3) +#define x264_pixel_hadamard_ac_16x16_ssse3_atom x264_template(pixel_hadamard_ac_16x16_ssse3_atom) +#define x264_pixel_hadamard_ac_16x16_xop x264_template(pixel_hadamard_ac_16x16_xop) +#define x264_pixel_hadamard_ac_16x8_avx x264_template(pixel_hadamard_ac_16x8_avx) +#define x264_pixel_hadamard_ac_16x8_avx2 x264_template(pixel_hadamard_ac_16x8_avx2) +#define x264_pixel_hadamard_ac_16x8_mmx2 x264_template(pixel_hadamard_ac_16x8_mmx2) +#define x264_pixel_hadamard_ac_16x8_sse2 x264_template(pixel_hadamard_ac_16x8_sse2) +#define x264_pixel_hadamard_ac_16x8_sse4 x264_template(pixel_hadamard_ac_16x8_sse4) +#define x264_pixel_hadamard_ac_16x8_ssse3 x264_template(pixel_hadamard_ac_16x8_ssse3) +#define x264_pixel_hadamard_ac_16x8_ssse3_atom x264_template(pixel_hadamard_ac_16x8_ssse3_atom) +#define x264_pixel_hadamard_ac_16x8_xop x264_template(pixel_hadamard_ac_16x8_xop) +#define x264_pixel_hadamard_ac_8x16_avx x264_template(pixel_hadamard_ac_8x16_avx) +#define x264_pixel_hadamard_ac_8x16_mmx2 x264_template(pixel_hadamard_ac_8x16_mmx2) +#define x264_pixel_hadamard_ac_8x16_sse2 x264_template(pixel_hadamard_ac_8x16_sse2) +#define x264_pixel_hadamard_ac_8x16_sse4 x264_template(pixel_hadamard_ac_8x16_sse4) +#define x264_pixel_hadamard_ac_8x16_ssse3 x264_template(pixel_hadamard_ac_8x16_ssse3) +#define x264_pixel_hadamard_ac_8x16_ssse3_atom x264_template(pixel_hadamard_ac_8x16_ssse3_atom) +#define x264_pixel_hadamard_ac_8x16_xop x264_template(pixel_hadamard_ac_8x16_xop) +#define x264_pixel_hadamard_ac_8x8_avx x264_template(pixel_hadamard_ac_8x8_avx) +#define x264_pixel_hadamard_ac_8x8_mmx2 x264_template(pixel_hadamard_ac_8x8_mmx2) +#define x264_pixel_hadamard_ac_8x8_sse2 x264_template(pixel_hadamard_ac_8x8_sse2) +#define x264_pixel_hadamard_ac_8x8_sse4 x264_template(pixel_hadamard_ac_8x8_sse4) +#define x264_pixel_hadamard_ac_8x8_ssse3 x264_template(pixel_hadamard_ac_8x8_ssse3) +#define x264_pixel_hadamard_ac_8x8_ssse3_atom x264_template(pixel_hadamard_ac_8x8_ssse3_atom) +#define x264_pixel_hadamard_ac_8x8_xop x264_template(pixel_hadamard_ac_8x8_xop) +#define x264_pixel_sa8d_16x16_mmx2 x264_template(pixel_sa8d_16x16_mmx2) +#define x264_pixel_sa8d_16x16_avx x264_template(pixel_sa8d_16x16_avx) +#define x264_pixel_sa8d_16x16_sse2 x264_template(pixel_sa8d_16x16_sse2) +#define x264_pixel_sa8d_16x16_sse4 x264_template(pixel_sa8d_16x16_sse4) +#define x264_pixel_sa8d_16x16_ssse3 x264_template(pixel_sa8d_16x16_ssse3) +#define x264_pixel_sa8d_16x16_ssse3_atom x264_template(pixel_sa8d_16x16_ssse3_atom) +#define x264_pixel_sa8d_16x16_xop x264_template(pixel_sa8d_16x16_xop) +#define x264_pixel_sa8d_8x8_mmx2 x264_template(pixel_sa8d_8x8_mmx2) +#define x264_pixel_sa8d_8x8_avx x264_template(pixel_sa8d_8x8_avx) +#define x264_pixel_sa8d_8x8_avx2 x264_template(pixel_sa8d_8x8_avx2) +#define x264_pixel_sa8d_8x8_avx512 x264_template(pixel_sa8d_8x8_avx512) +#define x264_pixel_sa8d_8x8_sse2 x264_template(pixel_sa8d_8x8_sse2) +#define x264_pixel_sa8d_8x8_sse4 x264_template(pixel_sa8d_8x8_sse4) +#define x264_pixel_sa8d_8x8_ssse3 x264_template(pixel_sa8d_8x8_ssse3) +#define x264_pixel_sa8d_8x8_ssse3_atom x264_template(pixel_sa8d_8x8_ssse3_atom) +#define x264_pixel_sa8d_8x8_xop x264_template(pixel_sa8d_8x8_xop) +#define x264_pixel_sad_16x16_avx2 x264_template(pixel_sad_16x16_avx2) +#define x264_pixel_sad_16x16_avx512 x264_template(pixel_sad_16x16_avx512) +#define x264_pixel_sad_16x16_cache32_mmx2 x264_template(pixel_sad_16x16_cache32_mmx2) +#define x264_pixel_sad_16x16_cache64_mmx2 x264_template(pixel_sad_16x16_cache64_mmx2) +#define x264_pixel_sad_16x16_cache64_sse2 x264_template(pixel_sad_16x16_cache64_sse2) +#define x264_pixel_sad_16x16_cache64_ssse3 x264_template(pixel_sad_16x16_cache64_ssse3) +#define x264_pixel_sad_16x16_mmx2 x264_template(pixel_sad_16x16_mmx2) +#define x264_pixel_sad_16x16_sse2 x264_template(pixel_sad_16x16_sse2) +#define x264_pixel_sad_16x16_sse2_aligned x264_template(pixel_sad_16x16_sse2_aligned) +#define x264_pixel_sad_16x16_sse3 x264_template(pixel_sad_16x16_sse3) +#define x264_pixel_sad_16x16_ssse3 x264_template(pixel_sad_16x16_ssse3) +#define x264_pixel_sad_16x16_ssse3_aligned x264_template(pixel_sad_16x16_ssse3_aligned) +#define x264_pixel_sad_16x8_avx2 x264_template(pixel_sad_16x8_avx2) +#define x264_pixel_sad_16x8_avx512 x264_template(pixel_sad_16x8_avx512) +#define x264_pixel_sad_16x8_cache32_mmx2 x264_template(pixel_sad_16x8_cache32_mmx2) +#define x264_pixel_sad_16x8_cache64_mmx2 x264_template(pixel_sad_16x8_cache64_mmx2) +#define x264_pixel_sad_16x8_cache64_sse2 x264_template(pixel_sad_16x8_cache64_sse2) +#define x264_pixel_sad_16x8_cache64_ssse3 x264_template(pixel_sad_16x8_cache64_ssse3) +#define x264_pixel_sad_16x8_mmx2 x264_template(pixel_sad_16x8_mmx2) +#define x264_pixel_sad_16x8_sse2 x264_template(pixel_sad_16x8_sse2) +#define x264_pixel_sad_16x8_sse2_aligned x264_template(pixel_sad_16x8_sse2_aligned) +#define x264_pixel_sad_16x8_sse3 x264_template(pixel_sad_16x8_sse3) +#define x264_pixel_sad_16x8_ssse3 x264_template(pixel_sad_16x8_ssse3) +#define x264_pixel_sad_16x8_ssse3_aligned x264_template(pixel_sad_16x8_ssse3_aligned) +#define x264_pixel_sad_4x16_avx512 x264_template(pixel_sad_4x16_avx512) +#define x264_pixel_sad_4x16_mmx2 x264_template(pixel_sad_4x16_mmx2) +#define x264_pixel_sad_4x4_avx512 x264_template(pixel_sad_4x4_avx512) +#define x264_pixel_sad_4x4_mmx2 x264_template(pixel_sad_4x4_mmx2) +#define x264_pixel_sad_4x4_ssse3 x264_template(pixel_sad_4x4_ssse3) +#define x264_pixel_sad_4x8_avx512 x264_template(pixel_sad_4x8_avx512) +#define x264_pixel_sad_4x8_mmx2 x264_template(pixel_sad_4x8_mmx2) +#define x264_pixel_sad_4x8_ssse3 x264_template(pixel_sad_4x8_ssse3) +#define x264_pixel_sad_8x16_avx512 x264_template(pixel_sad_8x16_avx512) +#define x264_pixel_sad_8x16_cache32_mmx2 x264_template(pixel_sad_8x16_cache32_mmx2) +#define x264_pixel_sad_8x16_cache64_mmx2 x264_template(pixel_sad_8x16_cache64_mmx2) +#define x264_pixel_sad_8x16_mmx2 x264_template(pixel_sad_8x16_mmx2) +#define x264_pixel_sad_8x16_sse2 x264_template(pixel_sad_8x16_sse2) +#define x264_pixel_sad_8x16_sse2_aligned x264_template(pixel_sad_8x16_sse2_aligned) +#define x264_pixel_sad_8x16_ssse3 x264_template(pixel_sad_8x16_ssse3) +#define x264_pixel_sad_8x16_ssse3_aligned x264_template(pixel_sad_8x16_ssse3_aligned) +#define x264_pixel_sad_8x4_avx512 x264_template(pixel_sad_8x4_avx512) +#define x264_pixel_sad_8x4_cache32_mmx2 x264_template(pixel_sad_8x4_cache32_mmx2) +#define x264_pixel_sad_8x4_cache64_mmx2 x264_template(pixel_sad_8x4_cache64_mmx2) +#define x264_pixel_sad_8x4_mmx2 x264_template(pixel_sad_8x4_mmx2) +#define x264_pixel_sad_8x4_sse2 x264_template(pixel_sad_8x4_sse2) +#define x264_pixel_sad_8x4_ssse3 x264_template(pixel_sad_8x4_ssse3) +#define x264_pixel_sad_8x8_avx512 x264_template(pixel_sad_8x8_avx512) +#define x264_pixel_sad_8x8_cache32_mmx2 x264_template(pixel_sad_8x8_cache32_mmx2) +#define x264_pixel_sad_8x8_cache64_mmx2 x264_template(pixel_sad_8x8_cache64_mmx2) +#define x264_pixel_sad_8x8_mmx2 x264_template(pixel_sad_8x8_mmx2) +#define x264_pixel_sad_8x8_sse2 x264_template(pixel_sad_8x8_sse2) +#define x264_pixel_sad_8x8_sse2_aligned x264_template(pixel_sad_8x8_sse2_aligned) +#define x264_pixel_sad_8x8_ssse3 x264_template(pixel_sad_8x8_ssse3) +#define x264_pixel_sad_8x8_ssse3_aligned x264_template(pixel_sad_8x8_ssse3_aligned) +#define x264_pixel_sad_x3_16x16_avx x264_template(pixel_sad_x3_16x16_avx) +#define x264_pixel_sad_x3_16x16_avx2 x264_template(pixel_sad_x3_16x16_avx2) +#define x264_pixel_sad_x3_16x16_avx512 x264_template(pixel_sad_x3_16x16_avx512) +#define x264_pixel_sad_x3_16x16_cache32_mmx2 x264_template(pixel_sad_x3_16x16_cache32_mmx2) +#define x264_pixel_sad_x3_16x16_cache64_mmx2 x264_template(pixel_sad_x3_16x16_cache64_mmx2) +#define x264_pixel_sad_x3_16x16_cache64_sse2 x264_template(pixel_sad_x3_16x16_cache64_sse2) +#define x264_pixel_sad_x3_16x16_cache64_ssse3 x264_template(pixel_sad_x3_16x16_cache64_ssse3) +#define x264_pixel_sad_x3_16x16_mmx2 x264_template(pixel_sad_x3_16x16_mmx2) +#define x264_pixel_sad_x3_16x16_sse2 x264_template(pixel_sad_x3_16x16_sse2) +#define x264_pixel_sad_x3_16x16_sse3 x264_template(pixel_sad_x3_16x16_sse3) +#define x264_pixel_sad_x3_16x16_ssse3 x264_template(pixel_sad_x3_16x16_ssse3) +#define x264_pixel_sad_x3_16x16_xop x264_template(pixel_sad_x3_16x16_xop) +#define x264_pixel_sad_x3_16x8_avx x264_template(pixel_sad_x3_16x8_avx) +#define x264_pixel_sad_x3_16x8_avx2 x264_template(pixel_sad_x3_16x8_avx2) +#define x264_pixel_sad_x3_16x8_avx512 x264_template(pixel_sad_x3_16x8_avx512) +#define x264_pixel_sad_x3_16x8_cache32_mmx2 x264_template(pixel_sad_x3_16x8_cache32_mmx2) +#define x264_pixel_sad_x3_16x8_cache64_mmx2 x264_template(pixel_sad_x3_16x8_cache64_mmx2) +#define x264_pixel_sad_x3_16x8_cache64_sse2 x264_template(pixel_sad_x3_16x8_cache64_sse2) +#define x264_pixel_sad_x3_16x8_cache64_ssse3 x264_template(pixel_sad_x3_16x8_cache64_ssse3) +#define x264_pixel_sad_x3_16x8_mmx2 x264_template(pixel_sad_x3_16x8_mmx2) +#define x264_pixel_sad_x3_16x8_sse2 x264_template(pixel_sad_x3_16x8_sse2) +#define x264_pixel_sad_x3_16x8_sse3 x264_template(pixel_sad_x3_16x8_sse3) +#define x264_pixel_sad_x3_16x8_ssse3 x264_template(pixel_sad_x3_16x8_ssse3) +#define x264_pixel_sad_x3_16x8_xop x264_template(pixel_sad_x3_16x8_xop) +#define x264_pixel_sad_x3_4x4_avx512 x264_template(pixel_sad_x3_4x4_avx512) +#define x264_pixel_sad_x3_4x4_mmx2 x264_template(pixel_sad_x3_4x4_mmx2) +#define x264_pixel_sad_x3_4x4_ssse3 x264_template(pixel_sad_x3_4x4_ssse3) +#define x264_pixel_sad_x3_4x8_avx512 x264_template(pixel_sad_x3_4x8_avx512) +#define x264_pixel_sad_x3_4x8_mmx2 x264_template(pixel_sad_x3_4x8_mmx2) +#define x264_pixel_sad_x3_4x8_ssse3 x264_template(pixel_sad_x3_4x8_ssse3) +#define x264_pixel_sad_x3_8x16_avx512 x264_template(pixel_sad_x3_8x16_avx512) +#define x264_pixel_sad_x3_8x16_cache32_mmx2 x264_template(pixel_sad_x3_8x16_cache32_mmx2) +#define x264_pixel_sad_x3_8x16_cache64_mmx2 x264_template(pixel_sad_x3_8x16_cache64_mmx2) +#define x264_pixel_sad_x3_8x16_cache64_sse2 x264_template(pixel_sad_x3_8x16_cache64_sse2) +#define x264_pixel_sad_x3_8x16_mmx2 x264_template(pixel_sad_x3_8x16_mmx2) +#define x264_pixel_sad_x3_8x16_sse2 x264_template(pixel_sad_x3_8x16_sse2) +#define x264_pixel_sad_x3_8x16_ssse3 x264_template(pixel_sad_x3_8x16_ssse3) +#define x264_pixel_sad_x3_8x16_xop x264_template(pixel_sad_x3_8x16_xop) +#define x264_pixel_sad_x3_8x4_avx512 x264_template(pixel_sad_x3_8x4_avx512) +#define x264_pixel_sad_x3_8x4_mmx2 x264_template(pixel_sad_x3_8x4_mmx2) +#define x264_pixel_sad_x3_8x4_sse2 x264_template(pixel_sad_x3_8x4_sse2) +#define x264_pixel_sad_x3_8x4_ssse3 x264_template(pixel_sad_x3_8x4_ssse3) +#define x264_pixel_sad_x3_8x4_xop x264_template(pixel_sad_x3_8x4_xop) +#define x264_pixel_sad_x3_8x8_avx512 x264_template(pixel_sad_x3_8x8_avx512) +#define x264_pixel_sad_x3_8x8_cache32_mmx2 x264_template(pixel_sad_x3_8x8_cache32_mmx2) +#define x264_pixel_sad_x3_8x8_cache64_mmx2 x264_template(pixel_sad_x3_8x8_cache64_mmx2) +#define x264_pixel_sad_x3_8x8_mmx2 x264_template(pixel_sad_x3_8x8_mmx2) +#define x264_pixel_sad_x3_8x8_sse2 x264_template(pixel_sad_x3_8x8_sse2) +#define x264_pixel_sad_x3_8x8_ssse3 x264_template(pixel_sad_x3_8x8_ssse3) +#define x264_pixel_sad_x3_8x8_xop x264_template(pixel_sad_x3_8x8_xop) +#define x264_pixel_sad_x4_16x16_avx x264_template(pixel_sad_x4_16x16_avx) +#define x264_pixel_sad_x4_16x16_avx2 x264_template(pixel_sad_x4_16x16_avx2) +#define x264_pixel_sad_x4_16x16_avx512 x264_template(pixel_sad_x4_16x16_avx512) +#define x264_pixel_sad_x4_16x16_cache32_mmx2 x264_template(pixel_sad_x4_16x16_cache32_mmx2) +#define x264_pixel_sad_x4_16x16_cache64_mmx2 x264_template(pixel_sad_x4_16x16_cache64_mmx2) +#define x264_pixel_sad_x4_16x16_cache64_sse2 x264_template(pixel_sad_x4_16x16_cache64_sse2) +#define x264_pixel_sad_x4_16x16_cache64_ssse3 x264_template(pixel_sad_x4_16x16_cache64_ssse3) +#define x264_pixel_sad_x4_16x16_mmx2 x264_template(pixel_sad_x4_16x16_mmx2) +#define x264_pixel_sad_x4_16x16_sse2 x264_template(pixel_sad_x4_16x16_sse2) +#define x264_pixel_sad_x4_16x16_sse3 x264_template(pixel_sad_x4_16x16_sse3) +#define x264_pixel_sad_x4_16x16_ssse3 x264_template(pixel_sad_x4_16x16_ssse3) +#define x264_pixel_sad_x4_16x16_xop x264_template(pixel_sad_x4_16x16_xop) +#define x264_pixel_sad_x4_16x8_avx x264_template(pixel_sad_x4_16x8_avx) +#define x264_pixel_sad_x4_16x8_avx2 x264_template(pixel_sad_x4_16x8_avx2) +#define x264_pixel_sad_x4_16x8_avx512 x264_template(pixel_sad_x4_16x8_avx512) +#define x264_pixel_sad_x4_16x8_cache32_mmx2 x264_template(pixel_sad_x4_16x8_cache32_mmx2) +#define x264_pixel_sad_x4_16x8_cache64_mmx2 x264_template(pixel_sad_x4_16x8_cache64_mmx2) +#define x264_pixel_sad_x4_16x8_cache64_sse2 x264_template(pixel_sad_x4_16x8_cache64_sse2) +#define x264_pixel_sad_x4_16x8_cache64_ssse3 x264_template(pixel_sad_x4_16x8_cache64_ssse3) +#define x264_pixel_sad_x4_16x8_mmx2 x264_template(pixel_sad_x4_16x8_mmx2) +#define x264_pixel_sad_x4_16x8_sse2 x264_template(pixel_sad_x4_16x8_sse2) +#define x264_pixel_sad_x4_16x8_sse3 x264_template(pixel_sad_x4_16x8_sse3) +#define x264_pixel_sad_x4_16x8_ssse3 x264_template(pixel_sad_x4_16x8_ssse3) +#define x264_pixel_sad_x4_16x8_xop x264_template(pixel_sad_x4_16x8_xop) +#define x264_pixel_sad_x4_4x4_avx512 x264_template(pixel_sad_x4_4x4_avx512) +#define x264_pixel_sad_x4_4x4_mmx2 x264_template(pixel_sad_x4_4x4_mmx2) +#define x264_pixel_sad_x4_4x4_ssse3 x264_template(pixel_sad_x4_4x4_ssse3) +#define x264_pixel_sad_x4_4x8_avx512 x264_template(pixel_sad_x4_4x8_avx512) +#define x264_pixel_sad_x4_4x8_mmx2 x264_template(pixel_sad_x4_4x8_mmx2) +#define x264_pixel_sad_x4_4x8_ssse3 x264_template(pixel_sad_x4_4x8_ssse3) +#define x264_pixel_sad_x4_8x16_avx512 x264_template(pixel_sad_x4_8x16_avx512) +#define x264_pixel_sad_x4_8x16_cache32_mmx2 x264_template(pixel_sad_x4_8x16_cache32_mmx2) +#define x264_pixel_sad_x4_8x16_cache64_mmx2 x264_template(pixel_sad_x4_8x16_cache64_mmx2) +#define x264_pixel_sad_x4_8x16_cache64_sse2 x264_template(pixel_sad_x4_8x16_cache64_sse2) +#define x264_pixel_sad_x4_8x16_mmx2 x264_template(pixel_sad_x4_8x16_mmx2) +#define x264_pixel_sad_x4_8x16_sse2 x264_template(pixel_sad_x4_8x16_sse2) +#define x264_pixel_sad_x4_8x16_ssse3 x264_template(pixel_sad_x4_8x16_ssse3) +#define x264_pixel_sad_x4_8x16_xop x264_template(pixel_sad_x4_8x16_xop) +#define x264_pixel_sad_x4_8x4_avx512 x264_template(pixel_sad_x4_8x4_avx512) +#define x264_pixel_sad_x4_8x4_mmx2 x264_template(pixel_sad_x4_8x4_mmx2) +#define x264_pixel_sad_x4_8x4_sse2 x264_template(pixel_sad_x4_8x4_sse2) +#define x264_pixel_sad_x4_8x4_ssse3 x264_template(pixel_sad_x4_8x4_ssse3) +#define x264_pixel_sad_x4_8x4_xop x264_template(pixel_sad_x4_8x4_xop) +#define x264_pixel_sad_x4_8x8_avx512 x264_template(pixel_sad_x4_8x8_avx512) +#define x264_pixel_sad_x4_8x8_cache32_mmx2 x264_template(pixel_sad_x4_8x8_cache32_mmx2) +#define x264_pixel_sad_x4_8x8_cache64_mmx2 x264_template(pixel_sad_x4_8x8_cache64_mmx2) +#define x264_pixel_sad_x4_8x8_mmx2 x264_template(pixel_sad_x4_8x8_mmx2) +#define x264_pixel_sad_x4_8x8_sse2 x264_template(pixel_sad_x4_8x8_sse2) +#define x264_pixel_sad_x4_8x8_ssse3 x264_template(pixel_sad_x4_8x8_ssse3) +#define x264_pixel_sad_x4_8x8_xop x264_template(pixel_sad_x4_8x8_xop) +#define x264_pixel_satd_16x16_avx x264_template(pixel_satd_16x16_avx) +#define x264_pixel_satd_16x16_avx2 x264_template(pixel_satd_16x16_avx2) +#define x264_pixel_satd_16x16_avx512 x264_template(pixel_satd_16x16_avx512) +#define x264_pixel_satd_16x16_mmx2 x264_template(pixel_satd_16x16_mmx2) +#define x264_pixel_satd_16x16_sse2 x264_template(pixel_satd_16x16_sse2) +#define x264_pixel_satd_16x16_sse4 x264_template(pixel_satd_16x16_sse4) +#define x264_pixel_satd_16x16_ssse3 x264_template(pixel_satd_16x16_ssse3) +#define x264_pixel_satd_16x16_ssse3_atom x264_template(pixel_satd_16x16_ssse3_atom) +#define x264_pixel_satd_16x16_xop x264_template(pixel_satd_16x16_xop) +#define x264_pixel_satd_16x8_avx x264_template(pixel_satd_16x8_avx) +#define x264_pixel_satd_16x8_avx2 x264_template(pixel_satd_16x8_avx2) +#define x264_pixel_satd_16x8_avx512 x264_template(pixel_satd_16x8_avx512) +#define x264_pixel_satd_16x8_mmx2 x264_template(pixel_satd_16x8_mmx2) +#define x264_pixel_satd_16x8_sse2 x264_template(pixel_satd_16x8_sse2) +#define x264_pixel_satd_16x8_sse4 x264_template(pixel_satd_16x8_sse4) +#define x264_pixel_satd_16x8_ssse3 x264_template(pixel_satd_16x8_ssse3) +#define x264_pixel_satd_16x8_ssse3_atom x264_template(pixel_satd_16x8_ssse3_atom) +#define x264_pixel_satd_16x8_xop x264_template(pixel_satd_16x8_xop) +#define x264_pixel_satd_4x16_avx x264_template(pixel_satd_4x16_avx) +#define x264_pixel_satd_4x16_avx512 x264_template(pixel_satd_4x16_avx512) +#define x264_pixel_satd_4x16_mmx2 x264_template(pixel_satd_4x16_mmx2) +#define x264_pixel_satd_4x16_sse2 x264_template(pixel_satd_4x16_sse2) +#define x264_pixel_satd_4x16_sse4 x264_template(pixel_satd_4x16_sse4) +#define x264_pixel_satd_4x16_ssse3 x264_template(pixel_satd_4x16_ssse3) +#define x264_pixel_satd_4x16_ssse3_atom x264_template(pixel_satd_4x16_ssse3_atom) +#define x264_pixel_satd_4x4_avx x264_template(pixel_satd_4x4_avx) +#define x264_pixel_satd_4x4_avx512 x264_template(pixel_satd_4x4_avx512) +#define x264_pixel_satd_4x4_mmx2 x264_template(pixel_satd_4x4_mmx2) +#define x264_pixel_satd_4x4_sse4 x264_template(pixel_satd_4x4_sse4) +#define x264_pixel_satd_4x4_ssse3 x264_template(pixel_satd_4x4_ssse3) +#define x264_pixel_satd_4x4_xop x264_template(pixel_satd_4x4_xop) +#define x264_pixel_satd_4x8_avx x264_template(pixel_satd_4x8_avx) +#define x264_pixel_satd_4x8_avx512 x264_template(pixel_satd_4x8_avx512) +#define x264_pixel_satd_4x8_mmx2 x264_template(pixel_satd_4x8_mmx2) +#define x264_pixel_satd_4x8_sse2 x264_template(pixel_satd_4x8_sse2) +#define x264_pixel_satd_4x8_sse4 x264_template(pixel_satd_4x8_sse4) +#define x264_pixel_satd_4x8_ssse3 x264_template(pixel_satd_4x8_ssse3) +#define x264_pixel_satd_4x8_ssse3_atom x264_template(pixel_satd_4x8_ssse3_atom) +#define x264_pixel_satd_4x8_xop x264_template(pixel_satd_4x8_xop) +#define x264_pixel_satd_8x16_avx x264_template(pixel_satd_8x16_avx) +#define x264_pixel_satd_8x16_avx2 x264_template(pixel_satd_8x16_avx2) +#define x264_pixel_satd_8x16_avx512 x264_template(pixel_satd_8x16_avx512) +#define x264_pixel_satd_8x16_mmx2 x264_template(pixel_satd_8x16_mmx2) +#define x264_pixel_satd_8x16_sse2 x264_template(pixel_satd_8x16_sse2) +#define x264_pixel_satd_8x16_sse4 x264_template(pixel_satd_8x16_sse4) +#define x264_pixel_satd_8x16_ssse3 x264_template(pixel_satd_8x16_ssse3) +#define x264_pixel_satd_8x16_ssse3_atom x264_template(pixel_satd_8x16_ssse3_atom) +#define x264_pixel_satd_8x16_xop x264_template(pixel_satd_8x16_xop) +#define x264_pixel_satd_8x4_avx x264_template(pixel_satd_8x4_avx) +#define x264_pixel_satd_8x4_avx512 x264_template(pixel_satd_8x4_avx512) +#define x264_pixel_satd_8x4_mmx2 x264_template(pixel_satd_8x4_mmx2) +#define x264_pixel_satd_8x4_sse2 x264_template(pixel_satd_8x4_sse2) +#define x264_pixel_satd_8x4_sse4 x264_template(pixel_satd_8x4_sse4) +#define x264_pixel_satd_8x4_ssse3 x264_template(pixel_satd_8x4_ssse3) +#define x264_pixel_satd_8x4_ssse3_atom x264_template(pixel_satd_8x4_ssse3_atom) +#define x264_pixel_satd_8x4_xop x264_template(pixel_satd_8x4_xop) +#define x264_pixel_satd_8x8_avx x264_template(pixel_satd_8x8_avx) +#define x264_pixel_satd_8x8_avx2 x264_template(pixel_satd_8x8_avx2) +#define x264_pixel_satd_8x8_avx512 x264_template(pixel_satd_8x8_avx512) +#define x264_pixel_satd_8x8_mmx2 x264_template(pixel_satd_8x8_mmx2) +#define x264_pixel_satd_8x8_sse2 x264_template(pixel_satd_8x8_sse2) +#define x264_pixel_satd_8x8_sse4 x264_template(pixel_satd_8x8_sse4) +#define x264_pixel_satd_8x8_ssse3 x264_template(pixel_satd_8x8_ssse3) +#define x264_pixel_satd_8x8_ssse3_atom x264_template(pixel_satd_8x8_ssse3_atom) +#define x264_pixel_satd_8x8_xop x264_template(pixel_satd_8x8_xop) +#define x264_pixel_ssd_16x16_avx x264_template(pixel_ssd_16x16_avx) +#define x264_pixel_ssd_16x16_avx2 x264_template(pixel_ssd_16x16_avx2) +#define x264_pixel_ssd_16x16_mmx x264_template(pixel_ssd_16x16_mmx) +#define x264_pixel_ssd_16x16_mmx2 x264_template(pixel_ssd_16x16_mmx2) +#define x264_pixel_ssd_16x16_sse2 x264_template(pixel_ssd_16x16_sse2) +#define x264_pixel_ssd_16x16_sse2slow x264_template(pixel_ssd_16x16_sse2slow) +#define x264_pixel_ssd_16x16_ssse3 x264_template(pixel_ssd_16x16_ssse3) +#define x264_pixel_ssd_16x16_xop x264_template(pixel_ssd_16x16_xop) +#define x264_pixel_ssd_16x8_avx x264_template(pixel_ssd_16x8_avx) +#define x264_pixel_ssd_16x8_avx2 x264_template(pixel_ssd_16x8_avx2) +#define x264_pixel_ssd_16x8_mmx x264_template(pixel_ssd_16x8_mmx) +#define x264_pixel_ssd_16x8_mmx2 x264_template(pixel_ssd_16x8_mmx2) +#define x264_pixel_ssd_16x8_sse2 x264_template(pixel_ssd_16x8_sse2) +#define x264_pixel_ssd_16x8_sse2slow x264_template(pixel_ssd_16x8_sse2slow) +#define x264_pixel_ssd_16x8_ssse3 x264_template(pixel_ssd_16x8_ssse3) +#define x264_pixel_ssd_16x8_xop x264_template(pixel_ssd_16x8_xop) +#define x264_pixel_ssd_4x16_mmx x264_template(pixel_ssd_4x16_mmx) +#define x264_pixel_ssd_4x16_mmx2 x264_template(pixel_ssd_4x16_mmx2) +#define x264_pixel_ssd_4x16_ssse3 x264_template(pixel_ssd_4x16_ssse3) +#define x264_pixel_ssd_4x4_mmx x264_template(pixel_ssd_4x4_mmx) +#define x264_pixel_ssd_4x4_mmx2 x264_template(pixel_ssd_4x4_mmx2) +#define x264_pixel_ssd_4x4_ssse3 x264_template(pixel_ssd_4x4_ssse3) +#define x264_pixel_ssd_4x8_mmx x264_template(pixel_ssd_4x8_mmx) +#define x264_pixel_ssd_4x8_mmx2 x264_template(pixel_ssd_4x8_mmx2) +#define x264_pixel_ssd_4x8_ssse3 x264_template(pixel_ssd_4x8_ssse3) +#define x264_pixel_ssd_8x16_avx x264_template(pixel_ssd_8x16_avx) +#define x264_pixel_ssd_8x16_mmx x264_template(pixel_ssd_8x16_mmx) +#define x264_pixel_ssd_8x16_mmx2 x264_template(pixel_ssd_8x16_mmx2) +#define x264_pixel_ssd_8x16_sse2 x264_template(pixel_ssd_8x16_sse2) +#define x264_pixel_ssd_8x16_sse2slow x264_template(pixel_ssd_8x16_sse2slow) +#define x264_pixel_ssd_8x16_ssse3 x264_template(pixel_ssd_8x16_ssse3) +#define x264_pixel_ssd_8x16_xop x264_template(pixel_ssd_8x16_xop) +#define x264_pixel_ssd_8x4_avx x264_template(pixel_ssd_8x4_avx) +#define x264_pixel_ssd_8x4_mmx x264_template(pixel_ssd_8x4_mmx) +#define x264_pixel_ssd_8x4_mmx2 x264_template(pixel_ssd_8x4_mmx2) +#define x264_pixel_ssd_8x4_sse2 x264_template(pixel_ssd_8x4_sse2) +#define x264_pixel_ssd_8x4_sse2slow x264_template(pixel_ssd_8x4_sse2slow) +#define x264_pixel_ssd_8x4_ssse3 x264_template(pixel_ssd_8x4_ssse3) +#define x264_pixel_ssd_8x4_xop x264_template(pixel_ssd_8x4_xop) +#define x264_pixel_ssd_8x8_avx x264_template(pixel_ssd_8x8_avx) +#define x264_pixel_ssd_8x8_mmx x264_template(pixel_ssd_8x8_mmx) +#define x264_pixel_ssd_8x8_mmx2 x264_template(pixel_ssd_8x8_mmx2) +#define x264_pixel_ssd_8x8_sse2 x264_template(pixel_ssd_8x8_sse2) +#define x264_pixel_ssd_8x8_sse2slow x264_template(pixel_ssd_8x8_sse2slow) +#define x264_pixel_ssd_8x8_ssse3 x264_template(pixel_ssd_8x8_ssse3) +#define x264_pixel_ssd_8x8_xop x264_template(pixel_ssd_8x8_xop) +#define x264_pixel_var_16x16_avx x264_template(pixel_var_16x16_avx) +#define x264_pixel_var_16x16_avx2 x264_template(pixel_var_16x16_avx2) +#define x264_pixel_var_16x16_avx512 x264_template(pixel_var_16x16_avx512) +#define x264_pixel_var_16x16_sse2 x264_template(pixel_var_16x16_sse2) +#define x264_pixel_var_8x16_avx x264_template(pixel_var_8x16_avx) +#define x264_pixel_var_8x16_avx512 x264_template(pixel_var_8x16_avx512) +#define x264_pixel_var_8x16_sse2 x264_template(pixel_var_8x16_sse2) +#define x264_pixel_var_8x8_avx x264_template(pixel_var_8x8_avx) +#define x264_pixel_var_8x8_avx512 x264_template(pixel_var_8x8_avx512) +#define x264_pixel_var_8x8_sse2 x264_template(pixel_var_8x8_sse2) #define DECL_PIXELS( ret, name, suffix, args ) \ ret x264_pixel_##name##_16x16_##suffix args;\ ret x264_pixel_##name##_16x8_##suffix args;\ @@ -110,86 +446,153 @@ DECL_PIXELS( uint64_t, hadamard_ac, avx2, ( pixel *pix, intptr_t i_stride )) +#define x264_intra_satd_x3_4x4_mmx2 x264_template(intra_satd_x3_4x4_mmx2) void x264_intra_satd_x3_4x4_mmx2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_4x4_mmx2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_4x4_sse2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_4x4_ssse3 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_4x4_avx ( pixel *, pixel *, int * ); +#define x264_intra_sad_x3_4x4_mmx2 x264_template(intra_sad_x3_4x4_mmx2) +void x264_intra_sad_x3_4x4_mmx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_4x4_sse2 x264_template(intra_sad_x3_4x4_sse2) +void x264_intra_sad_x3_4x4_sse2 ( uint16_t*, uint16_t*, int * ); +#define x264_intra_sad_x3_4x4_ssse3 x264_template(intra_sad_x3_4x4_ssse3) +void x264_intra_sad_x3_4x4_ssse3 ( uint16_t*, uint16_t*, int * ); +#define x264_intra_sad_x3_4x4_avx x264_template(intra_sad_x3_4x4_avx) +void x264_intra_sad_x3_4x4_avx ( uint16_t*, uint16_t*, int * ); +#define x264_intra_satd_x3_8x8c_mmx2 x264_template(intra_satd_x3_8x8c_mmx2) void x264_intra_satd_x3_8x8c_mmx2 ( pixel *, pixel *, int * ); +#define x264_intra_satd_x3_8x8c_ssse3 x264_template(intra_satd_x3_8x8c_ssse3) void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * ); -void x264_intra_sad_x3_8x8c_mmx2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8c_sse2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8c_ssse3 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8c_avx2 ( pixel *, pixel *, int * ); +#define x264_intra_sad_x3_8x8c_mmx2 x264_template(intra_sad_x3_8x8c_mmx2) +void x264_intra_sad_x3_8x8c_mmx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_8x8c_ssse3 x264_template(intra_sad_x3_8x8c_ssse3) +void x264_intra_sad_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_8x8c_avx2 x264_template(intra_sad_x3_8x8c_avx2) +void x264_intra_sad_x3_8x8c_avx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_satd_x3_16x16_mmx2 x264_template(intra_satd_x3_16x16_mmx2) void x264_intra_satd_x3_16x16_mmx2 ( pixel *, pixel *, int * ); +#define x264_intra_satd_x3_16x16_ssse3 x264_template(intra_satd_x3_16x16_ssse3) void x264_intra_satd_x3_16x16_ssse3( uint8_t *, uint8_t *, int * ); -void x264_intra_sad_x3_16x16_mmx2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_16x16_sse2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_16x16_ssse3 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_16x16_avx2 ( pixel *, pixel *, int * ); +#define x264_intra_sad_x3_16x16_mmx2 x264_template(intra_sad_x3_16x16_mmx2) +void x264_intra_sad_x3_16x16_mmx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_16x16_sse2 x264_template(intra_sad_x3_16x16_sse2) +void x264_intra_sad_x3_16x16_sse2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_16x16_ssse3 x264_template(intra_sad_x3_16x16_ssse3) +void x264_intra_sad_x3_16x16_ssse3 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_16x16_avx2 x264_template(intra_sad_x3_16x16_avx2) +void x264_intra_sad_x3_16x16_avx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sa8d_x3_8x8_mmx2 x264_template(intra_sa8d_x3_8x8_mmx2) void x264_intra_sa8d_x3_8x8_mmx2 ( uint8_t *, uint8_t *, int * ); -void x264_intra_sa8d_x3_8x8_sse2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8_mmx2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8_sse2 ( pixel *, pixel *, int * ); -void x264_intra_sad_x3_8x8_ssse3 ( pixel *, pixel *, int * ); +#define x264_intra_sa8d_x3_8x8_sse2 x264_template(intra_sa8d_x3_8x8_sse2) +void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_8x8_mmx2 x264_template(intra_sad_x3_8x8_mmx2) +void x264_intra_sad_x3_8x8_mmx2 ( uint8_t *, uint8_t *, int * ); +#define x264_intra_sad_x3_8x8_sse2 x264_template(intra_sad_x3_8x8_sse2) +void x264_intra_sad_x3_8x8_sse2 ( uint16_t*, uint16_t*, int * ); +#define x264_intra_sad_x3_8x8_ssse3 x264_template(intra_sad_x3_8x8_ssse3) +void x264_intra_sad_x3_8x8_ssse3 ( uint16_t*, uint16_t*, int * ); +#define x264_intra_sad_x3_8x8_avx2 x264_template(intra_sad_x3_8x8_avx2) void x264_intra_sad_x3_8x8_avx2 ( uint16_t*, uint16_t*, int * ); +#define x264_intra_satd_x9_4x4_ssse3 x264_template(intra_satd_x9_4x4_ssse3) int x264_intra_satd_x9_4x4_ssse3( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_satd_x9_4x4_sse4 x264_template(intra_satd_x9_4x4_sse4) int x264_intra_satd_x9_4x4_sse4 ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_satd_x9_4x4_avx x264_template(intra_satd_x9_4x4_avx) int x264_intra_satd_x9_4x4_avx ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_satd_x9_4x4_xop x264_template(intra_satd_x9_4x4_xop) int x264_intra_satd_x9_4x4_xop ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_sad_x9_4x4_ssse3 x264_template(intra_sad_x9_4x4_ssse3) int x264_intra_sad_x9_4x4_ssse3 ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_sad_x9_4x4_sse4 x264_template(intra_sad_x9_4x4_sse4) int x264_intra_sad_x9_4x4_sse4 ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_sad_x9_4x4_avx x264_template(intra_sad_x9_4x4_avx) int x264_intra_sad_x9_4x4_avx ( uint8_t *, uint8_t *, uint16_t * ); +#define x264_intra_sa8d_x9_8x8_ssse3 x264_template(intra_sa8d_x9_8x8_ssse3) int x264_intra_sa8d_x9_8x8_ssse3( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sa8d_x9_8x8_sse4 x264_template(intra_sa8d_x9_8x8_sse4) int x264_intra_sa8d_x9_8x8_sse4 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sa8d_x9_8x8_avx x264_template(intra_sa8d_x9_8x8_avx) int x264_intra_sa8d_x9_8x8_avx ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sad_x9_8x8_ssse3 x264_template(intra_sad_x9_8x8_ssse3) int x264_intra_sad_x9_8x8_ssse3 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sad_x9_8x8_sse4 x264_template(intra_sad_x9_8x8_sse4) int x264_intra_sad_x9_8x8_sse4 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sad_x9_8x8_avx x264_template(intra_sad_x9_8x8_avx) int x264_intra_sad_x9_8x8_avx ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_intra_sad_x9_8x8_avx2 x264_template(intra_sad_x9_8x8_avx2) int x264_intra_sad_x9_8x8_avx2 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * ); +#define x264_pixel_ssd_nv12_core_sse2 x264_template(pixel_ssd_nv12_core_sse2) void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v ); +#define x264_pixel_ssd_nv12_core_avx x264_template(pixel_ssd_nv12_core_avx) void x264_pixel_ssd_nv12_core_avx ( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v ); +#define x264_pixel_ssd_nv12_core_xop x264_template(pixel_ssd_nv12_core_xop) void x264_pixel_ssd_nv12_core_xop ( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v ); +#define x264_pixel_ssd_nv12_core_avx2 x264_template(pixel_ssd_nv12_core_avx2) void x264_pixel_ssd_nv12_core_avx2( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v ); +#define x264_pixel_ssim_4x4x2_core_mmx2 x264_template(pixel_ssim_4x4x2_core_mmx2) void x264_pixel_ssim_4x4x2_core_mmx2( const uint8_t *pix1, intptr_t stride1, const uint8_t *pix2, intptr_t stride2, int sums[2][4] ); +#define x264_pixel_ssim_4x4x2_core_sse2 x264_template(pixel_ssim_4x4x2_core_sse2) void x264_pixel_ssim_4x4x2_core_sse2( const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, int sums[2][4] ); +#define x264_pixel_ssim_4x4x2_core_avx x264_template(pixel_ssim_4x4x2_core_avx) void x264_pixel_ssim_4x4x2_core_avx ( const pixel *pix1, intptr_t stride1, const pixel *pix2, intptr_t stride2, int sums[2][4] ); +#define x264_pixel_ssim_end4_sse2 x264_template(pixel_ssim_end4_sse2) float x264_pixel_ssim_end4_sse2( int sum0[5][4], int sum1[5][4], int width ); +#define x264_pixel_ssim_end4_avx x264_template(pixel_ssim_end4_avx) float x264_pixel_ssim_end4_avx ( int sum0[5][4], int sum1[5][4], int width ); +#define x264_pixel_var2_8x8_sse2 x264_template(pixel_var2_8x8_sse2) int x264_pixel_var2_8x8_sse2 ( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_var2_8x8_ssse3 x264_template(pixel_var2_8x8_ssse3) int x264_pixel_var2_8x8_ssse3 ( uint8_t *fenc, uint8_t *fdec, int ssd[2] ); +#define x264_pixel_var2_8x8_avx2 x264_template(pixel_var2_8x8_avx2) int x264_pixel_var2_8x8_avx2 ( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_var2_8x8_avx512 x264_template(pixel_var2_8x8_avx512) int x264_pixel_var2_8x8_avx512 ( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_var2_8x16_sse2 x264_template(pixel_var2_8x16_sse2) int x264_pixel_var2_8x16_sse2 ( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_var2_8x16_ssse3 x264_template(pixel_var2_8x16_ssse3) int x264_pixel_var2_8x16_ssse3 ( uint8_t *fenc, uint8_t *fdec, int ssd[2] ); +#define x264_pixel_var2_8x16_avx2 x264_template(pixel_var2_8x16_avx2) int x264_pixel_var2_8x16_avx2 ( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_var2_8x16_avx512 x264_template(pixel_var2_8x16_avx512) int x264_pixel_var2_8x16_avx512( pixel *fenc, pixel *fdec, int ssd[2] ); +#define x264_pixel_vsad_mmx2 x264_template(pixel_vsad_mmx2) int x264_pixel_vsad_mmx2 ( pixel *src, intptr_t stride, int height ); +#define x264_pixel_vsad_sse2 x264_template(pixel_vsad_sse2) int x264_pixel_vsad_sse2 ( pixel *src, intptr_t stride, int height ); +#define x264_pixel_vsad_ssse3 x264_template(pixel_vsad_ssse3) int x264_pixel_vsad_ssse3( pixel *src, intptr_t stride, int height ); +#define x264_pixel_vsad_xop x264_template(pixel_vsad_xop) int x264_pixel_vsad_xop ( pixel *src, intptr_t stride, int height ); +#define x264_pixel_vsad_avx2 x264_template(pixel_vsad_avx2) int x264_pixel_vsad_avx2 ( uint16_t *src, intptr_t stride, int height ); +#define x264_pixel_asd8_sse2 x264_template(pixel_asd8_sse2) int x264_pixel_asd8_sse2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height ); +#define x264_pixel_asd8_ssse3 x264_template(pixel_asd8_ssse3) int x264_pixel_asd8_ssse3( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height ); +#define x264_pixel_asd8_xop x264_template(pixel_asd8_xop) int x264_pixel_asd8_xop ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int height ); +#define x264_pixel_sa8d_satd_16x16_sse2 x264_template(pixel_sa8d_satd_16x16_sse2) uint64_t x264_pixel_sa8d_satd_16x16_sse2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_ssse3 x264_template(pixel_sa8d_satd_16x16_ssse3) uint64_t x264_pixel_sa8d_satd_16x16_ssse3 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_ssse3_atom x264_template(pixel_sa8d_satd_16x16_ssse3_atom) uint64_t x264_pixel_sa8d_satd_16x16_ssse3_atom( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_sse4 x264_template(pixel_sa8d_satd_16x16_sse4) uint64_t x264_pixel_sa8d_satd_16x16_sse4 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_avx x264_template(pixel_sa8d_satd_16x16_avx) uint64_t x264_pixel_sa8d_satd_16x16_avx ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_xop x264_template(pixel_sa8d_satd_16x16_xop) uint64_t x264_pixel_sa8d_satd_16x16_xop ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); +#define x264_pixel_sa8d_satd_16x16_avx2 x264_template(pixel_sa8d_satd_16x16_avx2) uint64_t x264_pixel_sa8d_satd_16x16_avx2 ( pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2 ); diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/predict-a.asm --- x264-0.152.2854+gite9a5903/common/x86/predict-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/predict-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* predict-a.asm: x86 intra prediction ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2019 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -688,7 +688,7 @@ je .fix_lt_2 .do_top: and r2d, 4 -%ifdef PIC +%if ARCH_X86_64 lea r3, [shuf_fixtr] pshufb m3, [r3+r2*4] %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict-c.c x264-0.158.2988+git-20191101.7817004/common/x86/predict-c.c --- x264-0.152.2854+gite9a5903/common/x86/predict-c.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/predict-c.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -70,7 +70,7 @@ x264_predict_16x16_p_core_##name( src, i00, b, c ); #define PREDICT_16x16_P(name, name2)\ -static void x264_predict_16x16_p_##name( pixel *src )\ +static void predict_16x16_p_##name( pixel *src )\ {\ PREDICT_16x16_P_CORE\ PREDICT_16x16_P_END(name2)\ @@ -128,7 +128,7 @@ + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] ); #define PREDICT_16x16_P_INLINE(name, name2)\ -static void x264_predict_16x16_p_##name( pixel *src )\ +static void predict_16x16_p_##name( pixel *src )\ {\ PREDICT_16x16_P_CORE_INLINE\ PREDICT_16x16_P_END(name2)\ @@ -174,7 +174,7 @@ #endif // HIGH_BIT_DEPTH #define PREDICT_8x16C_P(name)\ -static void x264_predict_8x16c_p_##name( pixel *src )\ +static void predict_8x16c_p_##name( pixel *src )\ {\ PREDICT_8x16C_P_CORE\ PREDICT_8x16C_P_END(name)\ @@ -211,7 +211,7 @@ #endif // HIGH_BIT_DEPTH #define PREDICT_8x8C_P(name, name2)\ -static void x264_predict_8x8c_p_##name( pixel *src )\ +static void predict_8x8c_p_##name( pixel *src )\ {\ PREDICT_8x8C_P_CORE\ PREDICT_8x8C_P_END(name2)\ @@ -257,7 +257,7 @@ H += -4 * src[-1*FDEC_STRIDE -1]; #define PREDICT_8x8C_P_INLINE(name, name2)\ -static void x264_predict_8x8c_p_##name( pixel *src )\ +static void predict_8x8c_p_##name( pixel *src )\ {\ PREDICT_8x8C_P_CORE_INLINE\ PREDICT_8x8C_P_END(name2)\ @@ -281,7 +281,7 @@ PREDICT_8x8C_P_INLINE( avx2, avx2 ) #if ARCH_X86_64 && !HIGH_BIT_DEPTH -static void x264_predict_8x8c_dc_left( uint8_t *src ) +static void predict_8x8c_dc_left( uint8_t *src ) { int y; uint32_t s0 = 0, s1 = 0; @@ -327,7 +327,7 @@ pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2; pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2; pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2; - pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2; + pf[I_PRED_16x16_P] = predict_16x16_p_sse2; if( !(cpu&X264_CPU_AVX) ) return; pf[I_PRED_16x16_V] = x264_predict_16x16_v_avx; @@ -336,7 +336,7 @@ pf[I_PRED_16x16_H] = x264_predict_16x16_h_avx2; #else #if !ARCH_X86_64 - pf[I_PRED_16x16_P] = x264_predict_16x16_p_mmx2; + pf[I_PRED_16x16_P] = predict_16x16_p_mmx2; #endif if( !(cpu&X264_CPU_SSE) ) return; @@ -348,22 +348,22 @@ return; pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2; pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2; - pf[I_PRED_16x16_P] = x264_predict_16x16_p_sse2; + pf[I_PRED_16x16_P] = predict_16x16_p_sse2; if( !(cpu&X264_CPU_SSSE3) ) return; if( !(cpu&X264_CPU_SLOW_PSHUFB) ) pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3; #if HAVE_X86_INLINE_ASM - pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3; + pf[I_PRED_16x16_P] = predict_16x16_p_ssse3; #endif if( !(cpu&X264_CPU_AVX) ) return; - pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx; + pf[I_PRED_16x16_P] = predict_16x16_p_avx; #endif // HIGH_BIT_DEPTH if( cpu&X264_CPU_AVX2 ) { - pf[I_PRED_16x16_P] = x264_predict_16x16_p_avx2; + pf[I_PRED_16x16_P] = predict_16x16_p_avx2; pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_avx2; pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_avx2; pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2; @@ -388,16 +388,16 @@ pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2; pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2; - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_sse2; if( !(cpu&X264_CPU_AVX) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx; if( !(cpu&X264_CPU_AVX2) ) return; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_avx2; #else #if ARCH_X86_64 - pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left; + pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left; #endif pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx; if( !(cpu&X264_CPU_MMX2) ) @@ -405,26 +405,26 @@ pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmx2; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2; #if !ARCH_X86_64 - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_mmx2; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_mmx2; #endif pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2; if( !(cpu&X264_CPU_SSE2) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_sse2; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_sse2; if( !(cpu&X264_CPU_SSSE3) ) return; pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3; #if HAVE_X86_INLINE_ASM - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_ssse3; #endif if( !(cpu&X264_CPU_AVX) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx; #endif // HIGH_BIT_DEPTH if( cpu&X264_CPU_AVX2 ) { - pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_avx2; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx2; } } @@ -445,10 +445,10 @@ pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_sse2; pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_sse2; pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_sse2; - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_sse2; if( !(cpu&X264_CPU_AVX) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx; if( !(cpu&X264_CPU_AVX2) ) return; pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_avx2; @@ -460,22 +460,22 @@ pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2; pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2; #if !ARCH_X86_64 - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_mmx2; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_mmx2; #endif if( !(cpu&X264_CPU_SSE2) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_sse2; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_sse2; if( !(cpu&X264_CPU_SSSE3) ) return; pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_ssse3; if( !(cpu&X264_CPU_AVX) ) return; - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx; #endif // HIGH_BIT_DEPTH if( cpu&X264_CPU_AVX2 ) { - pf[I_PRED_CHROMA_P] = x264_predict_8x16c_p_avx2; + pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx2; } } diff -Nru x264-0.152.2854+gite9a5903/common/x86/predict.h x264-0.158.2988+git-20191101.7817004/common/x86/predict.h --- x264-0.152.2854+gite9a5903/common/x86/predict.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/predict.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: x86 intra prediction ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -24,121 +24,233 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_I386_PREDICT_H -#define X264_I386_PREDICT_H +#ifndef X264_X86_PREDICT_H +#define X264_X86_PREDICT_H +#define x264_predict_16x16_init_mmx x264_template(predict_16x16_init_mmx) void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x16c_init_mmx x264_template(predict_8x16c_init_mmx) void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] ); +#define x264_predict_8x8c_init_mmx x264_template(predict_8x8c_init_mmx) void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] ); +#define x264_predict_4x4_init_mmx x264_template(predict_4x4_init_mmx) void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] ); +#define x264_predict_8x8_init_mmx x264_template(predict_8x8_init_mmx) void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ); +#define x264_predict_16x16_v_mmx2 x264_template(predict_16x16_v_mmx2) void x264_predict_16x16_v_mmx2( pixel *src ); +#define x264_predict_16x16_v_sse x264_template(predict_16x16_v_sse) void x264_predict_16x16_v_sse ( pixel *src ); +#define x264_predict_16x16_v_avx x264_template(predict_16x16_v_avx) void x264_predict_16x16_v_avx ( uint16_t *src ); +#define x264_predict_16x16_h_mmx2 x264_template(predict_16x16_h_mmx2) void x264_predict_16x16_h_mmx2( pixel *src ); +#define x264_predict_16x16_h_sse2 x264_template(predict_16x16_h_sse2) void x264_predict_16x16_h_sse2( uint16_t *src ); +#define x264_predict_16x16_h_ssse3 x264_template(predict_16x16_h_ssse3) void x264_predict_16x16_h_ssse3( uint8_t *src ); +#define x264_predict_16x16_h_avx2 x264_template(predict_16x16_h_avx2) void x264_predict_16x16_h_avx2( uint16_t *src ); +#define x264_predict_16x16_dc_sse2 x264_template(predict_16x16_dc_sse2) void x264_predict_16x16_dc_sse2( pixel *src ); +#define x264_predict_16x16_dc_avx2 x264_template(predict_16x16_dc_avx2) void x264_predict_16x16_dc_avx2( pixel *src ); +#define x264_predict_16x16_dc_left_sse2 x264_template(predict_16x16_dc_left_sse2) void x264_predict_16x16_dc_left_sse2( pixel *src ); +#define x264_predict_16x16_dc_left_avx2 x264_template(predict_16x16_dc_left_avx2) void x264_predict_16x16_dc_left_avx2( pixel *src ); +#define x264_predict_16x16_dc_top_sse2 x264_template(predict_16x16_dc_top_sse2) void x264_predict_16x16_dc_top_sse2( pixel *src ); +#define x264_predict_16x16_dc_top_avx2 x264_template(predict_16x16_dc_top_avx2) void x264_predict_16x16_dc_top_avx2( pixel *src ); +#define x264_predict_16x16_p_core_mmx2 x264_template(predict_16x16_p_core_mmx2) void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c ); +#define x264_predict_16x16_p_core_sse2 x264_template(predict_16x16_p_core_sse2) void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c ); +#define x264_predict_16x16_p_core_avx x264_template(predict_16x16_p_core_avx) void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c ); +#define x264_predict_16x16_p_core_avx2 x264_template(predict_16x16_p_core_avx2) void x264_predict_16x16_p_core_avx2( pixel *src, int i00, int b, int c ); +#define x264_predict_8x16c_dc_mmx2 x264_template(predict_8x16c_dc_mmx2) void x264_predict_8x16c_dc_mmx2( pixel *src ); +#define x264_predict_8x16c_dc_sse2 x264_template(predict_8x16c_dc_sse2) void x264_predict_8x16c_dc_sse2( uint16_t *src ); +#define x264_predict_8x16c_dc_top_mmx2 x264_template(predict_8x16c_dc_top_mmx2) void x264_predict_8x16c_dc_top_mmx2( uint8_t *src ); +#define x264_predict_8x16c_dc_top_sse2 x264_template(predict_8x16c_dc_top_sse2) void x264_predict_8x16c_dc_top_sse2( uint16_t *src ); +#define x264_predict_8x16c_v_mmx x264_template(predict_8x16c_v_mmx) void x264_predict_8x16c_v_mmx( uint8_t *src ); +#define x264_predict_8x16c_v_sse x264_template(predict_8x16c_v_sse) void x264_predict_8x16c_v_sse( uint16_t *src ); +#define x264_predict_8x16c_h_mmx2 x264_template(predict_8x16c_h_mmx2) void x264_predict_8x16c_h_mmx2( pixel *src ); +#define x264_predict_8x16c_h_sse2 x264_template(predict_8x16c_h_sse2) void x264_predict_8x16c_h_sse2( uint16_t *src ); +#define x264_predict_8x16c_h_ssse3 x264_template(predict_8x16c_h_ssse3) void x264_predict_8x16c_h_ssse3( uint8_t *src ); +#define x264_predict_8x16c_h_avx2 x264_template(predict_8x16c_h_avx2) void x264_predict_8x16c_h_avx2( uint16_t *src ); +#define x264_predict_8x16c_p_core_mmx2 x264_template(predict_8x16c_p_core_mmx2) void x264_predict_8x16c_p_core_mmx2( uint8_t *src, int i00, int b, int c ); +#define x264_predict_8x16c_p_core_sse2 x264_template(predict_8x16c_p_core_sse2) void x264_predict_8x16c_p_core_sse2( pixel *src, int i00, int b, int c ); +#define x264_predict_8x16c_p_core_avx x264_template(predict_8x16c_p_core_avx) void x264_predict_8x16c_p_core_avx ( pixel *src, int i00, int b, int c ); +#define x264_predict_8x16c_p_core_avx2 x264_template(predict_8x16c_p_core_avx2) void x264_predict_8x16c_p_core_avx2( pixel *src, int i00, int b, int c ); +#define x264_predict_8x8c_p_core_mmx2 x264_template(predict_8x8c_p_core_mmx2) void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c ); +#define x264_predict_8x8c_p_core_sse2 x264_template(predict_8x8c_p_core_sse2) void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c ); +#define x264_predict_8x8c_p_core_avx x264_template(predict_8x8c_p_core_avx) void x264_predict_8x8c_p_core_avx ( pixel *src, int i00, int b, int c ); +#define x264_predict_8x8c_p_core_avx2 x264_template(predict_8x8c_p_core_avx2) void x264_predict_8x8c_p_core_avx2( pixel *src, int i00, int b, int c ); +#define x264_predict_8x8c_dc_mmx2 x264_template(predict_8x8c_dc_mmx2) void x264_predict_8x8c_dc_mmx2( pixel *src ); +#define x264_predict_8x8c_dc_sse2 x264_template(predict_8x8c_dc_sse2) void x264_predict_8x8c_dc_sse2( uint16_t *src ); +#define x264_predict_8x8c_dc_top_mmx2 x264_template(predict_8x8c_dc_top_mmx2) void x264_predict_8x8c_dc_top_mmx2( uint8_t *src ); +#define x264_predict_8x8c_dc_top_sse2 x264_template(predict_8x8c_dc_top_sse2) void x264_predict_8x8c_dc_top_sse2( uint16_t *src ); +#define x264_predict_8x8c_v_mmx x264_template(predict_8x8c_v_mmx) void x264_predict_8x8c_v_mmx( pixel *src ); +#define x264_predict_8x8c_v_sse x264_template(predict_8x8c_v_sse) void x264_predict_8x8c_v_sse( uint16_t *src ); +#define x264_predict_8x8c_h_mmx2 x264_template(predict_8x8c_h_mmx2) void x264_predict_8x8c_h_mmx2( pixel *src ); +#define x264_predict_8x8c_h_sse2 x264_template(predict_8x8c_h_sse2) void x264_predict_8x8c_h_sse2( uint16_t *src ); +#define x264_predict_8x8c_h_ssse3 x264_template(predict_8x8c_h_ssse3) void x264_predict_8x8c_h_ssse3( uint8_t *src ); +#define x264_predict_8x8c_h_avx2 x264_template(predict_8x8c_h_avx2) void x264_predict_8x8c_h_avx2( uint16_t *src ); +#define x264_predict_8x8_v_mmx2 x264_template(predict_8x8_v_mmx2) void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_v_sse x264_template(predict_8x8_v_sse) void x264_predict_8x8_v_sse ( uint16_t *src, uint16_t edge[36] ); +#define x264_predict_8x8_h_mmx2 x264_template(predict_8x8_h_mmx2) void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_h_sse2 x264_template(predict_8x8_h_sse2) void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] ); +#define x264_predict_8x8_hd_mmx2 x264_template(predict_8x8_hd_mmx2) void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_hu_mmx2 x264_template(predict_8x8_hu_mmx2) void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_dc_mmx2 x264_template(predict_8x8_dc_mmx2) void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_dc_sse2 x264_template(predict_8x8_dc_sse2) void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] ); +#define x264_predict_8x8_dc_top_mmx2 x264_template(predict_8x8_dc_top_mmx2) void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_dc_top_sse2 x264_template(predict_8x8_dc_top_sse2) void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] ); +#define x264_predict_8x8_dc_left_mmx2 x264_template(predict_8x8_dc_left_mmx2) void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_dc_left_sse2 x264_template(predict_8x8_dc_left_sse2) void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] ); +#define x264_predict_8x8_ddl_mmx2 x264_template(predict_8x8_ddl_mmx2) void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_ddl_sse2 x264_template(predict_8x8_ddl_sse2) void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddl_ssse3 x264_template(predict_8x8_ddl_ssse3) void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddl_cache64_ssse3 x264_template(predict_8x8_ddl_cache64_ssse3) void x264_predict_8x8_ddl_cache64_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddl_avx x264_template(predict_8x8_ddl_avx) void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddr_mmx2 x264_template(predict_8x8_ddr_mmx2) void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_ddr_sse2 x264_template(predict_8x8_ddr_sse2) void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddr_ssse3 x264_template(predict_8x8_ddr_ssse3) void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddr_cache64_ssse3 x264_template(predict_8x8_ddr_cache64_ssse3) void x264_predict_8x8_ddr_cache64_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_ddr_avx x264_template(predict_8x8_ddr_avx) void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vl_sse2 x264_template(predict_8x8_vl_sse2) void x264_predict_8x8_vl_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vl_ssse3 x264_template(predict_8x8_vl_ssse3) void x264_predict_8x8_vl_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vl_avx x264_template(predict_8x8_vl_avx) void x264_predict_8x8_vl_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vl_mmx2 x264_template(predict_8x8_vl_mmx2) void x264_predict_8x8_vl_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vr_mmx2 x264_template(predict_8x8_vr_mmx2) void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] ); +#define x264_predict_8x8_vr_sse2 x264_template(predict_8x8_vr_sse2) void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vr_ssse3 x264_template(predict_8x8_vr_ssse3) void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_vr_avx x264_template(predict_8x8_vr_avx) void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hu_sse2 x264_template(predict_8x8_hu_sse2) void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hu_ssse3 x264_template(predict_8x8_hu_ssse3) void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hu_avx x264_template(predict_8x8_hu_avx) void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hd_sse2 x264_template(predict_8x8_hd_sse2) void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hd_ssse3 x264_template(predict_8x8_hd_ssse3) void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_hd_avx x264_template(predict_8x8_hd_avx) void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] ); +#define x264_predict_8x8_filter_mmx2 x264_template(predict_8x8_filter_mmx2) void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters ); +#define x264_predict_8x8_filter_sse2 x264_template(predict_8x8_filter_sse2) void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); +#define x264_predict_8x8_filter_ssse3 x264_template(predict_8x8_filter_ssse3) void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters ); +#define x264_predict_8x8_filter_avx x264_template(predict_8x8_filter_avx) void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); +#define x264_predict_4x4_h_avx2 x264_template(predict_4x4_h_avx2) void x264_predict_4x4_h_avx2( uint16_t *src ); +#define x264_predict_4x4_ddl_mmx2 x264_template(predict_4x4_ddl_mmx2) void x264_predict_4x4_ddl_mmx2( pixel *src ); +#define x264_predict_4x4_ddl_sse2 x264_template(predict_4x4_ddl_sse2) void x264_predict_4x4_ddl_sse2( uint16_t *src ); +#define x264_predict_4x4_ddl_avx x264_template(predict_4x4_ddl_avx) void x264_predict_4x4_ddl_avx( uint16_t *src ); +#define x264_predict_4x4_ddr_mmx2 x264_template(predict_4x4_ddr_mmx2) void x264_predict_4x4_ddr_mmx2( pixel *src ); +#define x264_predict_4x4_vl_mmx2 x264_template(predict_4x4_vl_mmx2) void x264_predict_4x4_vl_mmx2( pixel *src ); +#define x264_predict_4x4_vl_sse2 x264_template(predict_4x4_vl_sse2) void x264_predict_4x4_vl_sse2( uint16_t *src ); +#define x264_predict_4x4_vl_avx x264_template(predict_4x4_vl_avx) void x264_predict_4x4_vl_avx( uint16_t *src ); +#define x264_predict_4x4_vr_mmx2 x264_template(predict_4x4_vr_mmx2) void x264_predict_4x4_vr_mmx2( uint8_t *src ); +#define x264_predict_4x4_vr_sse2 x264_template(predict_4x4_vr_sse2) void x264_predict_4x4_vr_sse2( uint16_t *src ); +#define x264_predict_4x4_vr_ssse3 x264_template(predict_4x4_vr_ssse3) void x264_predict_4x4_vr_ssse3( pixel *src ); +#define x264_predict_4x4_vr_cache64_ssse3 x264_template(predict_4x4_vr_cache64_ssse3) void x264_predict_4x4_vr_cache64_ssse3( uint8_t *src ); +#define x264_predict_4x4_vr_avx x264_template(predict_4x4_vr_avx) void x264_predict_4x4_vr_avx( uint16_t *src ); +#define x264_predict_4x4_hd_mmx2 x264_template(predict_4x4_hd_mmx2) void x264_predict_4x4_hd_mmx2( pixel *src ); +#define x264_predict_4x4_hd_sse2 x264_template(predict_4x4_hd_sse2) void x264_predict_4x4_hd_sse2( uint16_t *src ); +#define x264_predict_4x4_hd_ssse3 x264_template(predict_4x4_hd_ssse3) void x264_predict_4x4_hd_ssse3( pixel *src ); +#define x264_predict_4x4_hd_avx x264_template(predict_4x4_hd_avx) void x264_predict_4x4_hd_avx( uint16_t *src ); +#define x264_predict_4x4_dc_mmx2 x264_template(predict_4x4_dc_mmx2) void x264_predict_4x4_dc_mmx2( pixel *src ); +#define x264_predict_4x4_ddr_sse2 x264_template(predict_4x4_ddr_sse2) void x264_predict_4x4_ddr_sse2( uint16_t *src ); +#define x264_predict_4x4_ddr_ssse3 x264_template(predict_4x4_ddr_ssse3) void x264_predict_4x4_ddr_ssse3( pixel *src ); +#define x264_predict_4x4_ddr_avx x264_template(predict_4x4_ddr_avx) void x264_predict_4x4_ddr_avx( uint16_t *src ); +#define x264_predict_4x4_hu_mmx2 x264_template(predict_4x4_hu_mmx2) void x264_predict_4x4_hu_mmx2( pixel *src ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/quant-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/quant-a.asm --- x264-0.152.2854+gite9a5903/common/x86/quant-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/quant-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* quant-a.asm: x86 quantization and level-run ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -673,7 +673,7 @@ sub t2d, t0d sub t2d, t1d ; i_mf = i_qp % 6 shl t2d, %2 -%ifdef PIC +%if ARCH_X86_64 lea r1, [dequant%1_scale] add r1, t2 %else @@ -761,7 +761,7 @@ sub t2d, t1d ; i_mf = i_qp % 6 shl t2d, %1 %if %2 -%ifdef PIC +%if ARCH_X86_64 %define dmf r1+t2 lea r1, [dequant8_scale] %else @@ -1421,8 +1421,8 @@ %endif %endmacro -cextern decimate_table4 -cextern decimate_table8 +cextern_common decimate_table4 +cextern_common decimate_table8 %macro DECIMATE4x4 1 @@ -1449,7 +1449,7 @@ shr edx, 1 %endif %endif -%ifdef PIC +%if ARCH_X86_64 lea r4, [decimate_mask_table4] %define mask_table r4 %else @@ -1580,16 +1580,11 @@ add eax, r3d jnz .ret9 %endif -%ifdef PIC - lea r4, [decimate_table8] - %define table r4 -%else - %define table decimate_table8 -%endif + lea r4, [decimate_table8] mov al, -6 .loop: tzcnt rcx, r1 - add al, byte [table + rcx] + add al, byte [r4 + rcx] jge .ret9 shr r1, 1 SHRX r1, rcx @@ -2165,7 +2160,7 @@ %macro COEFF_LEVELRUN_LUT 1 cglobal coeff_level_run%1,2,4+(%1/9) -%ifdef PIC +%if ARCH_X86_64 lea r5, [$$] %define GLOBAL +r5-$$ %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/quant.h x264-0.158.2988+git-20191101.7817004/common/x86/quant.h --- x264-0.152.2854+gite9a5903/common/x86/quant.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/quant.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: x86 quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -25,132 +25,254 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_I386_QUANT_H -#define X264_I386_QUANT_H +#ifndef X264_X86_QUANT_H +#define X264_X86_QUANT_H +#define x264_quant_2x2_dc_mmx2 x264_template(quant_2x2_dc_mmx2) int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias ); +#define x264_quant_4x4_dc_mmx2 x264_template(quant_4x4_dc_mmx2) int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_mmx2 x264_template(quant_4x4_mmx2) int x264_quant_4x4_mmx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_8x8_mmx2 x264_template(quant_8x8_mmx2) int x264_quant_8x8_mmx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +#define x264_quant_2x2_dc_sse2 x264_template(quant_2x2_dc_sse2) int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_dc_sse2 x264_template(quant_4x4_dc_sse2) int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_sse2 x264_template(quant_4x4_sse2) int x264_quant_4x4_sse2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_4x4x4_sse2 x264_template(quant_4x4x4_sse2) int x264_quant_4x4x4_sse2( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_8x8_sse2 x264_template(quant_8x8_sse2) int x264_quant_8x8_sse2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +#define x264_quant_2x2_dc_ssse3 x264_template(quant_2x2_dc_ssse3) int x264_quant_2x2_dc_ssse3( dctcoef dct[4], int mf, int bias ); +#define x264_quant_4x4_dc_ssse3 x264_template(quant_4x4_dc_ssse3) int x264_quant_4x4_dc_ssse3( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_ssse3 x264_template(quant_4x4_ssse3) int x264_quant_4x4_ssse3( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_4x4x4_ssse3 x264_template(quant_4x4x4_ssse3) int x264_quant_4x4x4_ssse3( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_8x8_ssse3 x264_template(quant_8x8_ssse3) int x264_quant_8x8_ssse3( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +#define x264_quant_2x2_dc_sse4 x264_template(quant_2x2_dc_sse4) int x264_quant_2x2_dc_sse4( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_dc_sse4 x264_template(quant_4x4_dc_sse4) int x264_quant_4x4_dc_sse4( dctcoef dct[16], int mf, int bias ); +#define x264_quant_4x4_sse4 x264_template(quant_4x4_sse4) int x264_quant_4x4_sse4( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_4x4x4_sse4 x264_template(quant_4x4x4_sse4) int x264_quant_4x4x4_sse4( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_8x8_sse4 x264_template(quant_8x8_sse4) int x264_quant_8x8_sse4( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +#define x264_quant_4x4_avx2 x264_template(quant_4x4_avx2) int x264_quant_4x4_avx2( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_quant_4x4_dc_avx2 x264_template(quant_4x4_dc_avx2) int x264_quant_4x4_dc_avx2( dctcoef dct[16], int mf, int bias ); +#define x264_quant_8x8_avx2 x264_template(quant_8x8_avx2) int x264_quant_8x8_avx2( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] ); +#define x264_quant_4x4x4_avx2 x264_template(quant_4x4x4_avx2) int x264_quant_4x4x4_avx2( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] ); +#define x264_dequant_4x4_mmx x264_template(dequant_4x4_mmx) void x264_dequant_4x4_mmx( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4dc_mmx2 x264_template(dequant_4x4dc_mmx2) void x264_dequant_4x4dc_mmx2( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_mmx x264_template(dequant_8x8_mmx) void x264_dequant_8x8_mmx( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_sse2 x264_template(dequant_4x4_sse2) void x264_dequant_4x4_sse2( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4dc_sse2 x264_template(dequant_4x4dc_sse2) void x264_dequant_4x4dc_sse2( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_sse2 x264_template(dequant_8x8_sse2) void x264_dequant_8x8_sse2( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_avx x264_template(dequant_4x4_avx) void x264_dequant_4x4_avx( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4dc_avx x264_template(dequant_4x4dc_avx) void x264_dequant_4x4dc_avx( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_avx x264_template(dequant_8x8_avx) void x264_dequant_8x8_avx( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_xop x264_template(dequant_4x4_xop) void x264_dequant_4x4_xop( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4dc_xop x264_template(dequant_4x4dc_xop) void x264_dequant_4x4dc_xop( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_xop x264_template(dequant_8x8_xop) void x264_dequant_8x8_xop( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_avx2 x264_template(dequant_4x4_avx2) void x264_dequant_4x4_avx2( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_4x4dc_avx2 x264_template(dequant_4x4dc_avx2) void x264_dequant_4x4dc_avx2( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_avx2 x264_template(dequant_8x8_avx2) void x264_dequant_8x8_avx2( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_avx512 x264_template(dequant_4x4_avx512) void x264_dequant_4x4_avx512( dctcoef dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_avx512 x264_template(dequant_8x8_avx512) void x264_dequant_8x8_avx512( dctcoef dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_flat16_mmx x264_template(dequant_4x4_flat16_mmx) void x264_dequant_4x4_flat16_mmx( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_flat16_mmx x264_template(dequant_8x8_flat16_mmx) void x264_dequant_8x8_flat16_mmx( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_flat16_sse2 x264_template(dequant_4x4_flat16_sse2) void x264_dequant_4x4_flat16_sse2( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_flat16_sse2 x264_template(dequant_8x8_flat16_sse2) void x264_dequant_8x8_flat16_sse2( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_4x4_flat16_avx2 x264_template(dequant_4x4_flat16_avx2) void x264_dequant_4x4_flat16_avx2( int16_t dct[16], int dequant_mf[6][16], int i_qp ); +#define x264_dequant_8x8_flat16_avx2 x264_template(dequant_8x8_flat16_avx2) void x264_dequant_8x8_flat16_avx2( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_dequant_8x8_flat16_avx512 x264_template(dequant_8x8_flat16_avx512) void x264_dequant_8x8_flat16_avx512( int16_t dct[64], int dequant_mf[6][64], int i_qp ); +#define x264_idct_dequant_2x4_dc_sse2 x264_template(idct_dequant_2x4_dc_sse2) void x264_idct_dequant_2x4_dc_sse2( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp ); +#define x264_idct_dequant_2x4_dc_avx x264_template(idct_dequant_2x4_dc_avx) void x264_idct_dequant_2x4_dc_avx ( dctcoef dct[8], dctcoef dct4x4[8][16], int dequant_mf[6][16], int i_qp ); +#define x264_idct_dequant_2x4_dconly_sse2 x264_template(idct_dequant_2x4_dconly_sse2) void x264_idct_dequant_2x4_dconly_sse2( dctcoef dct[8], int dequant_mf[6][16], int i_qp ); +#define x264_idct_dequant_2x4_dconly_avx x264_template(idct_dequant_2x4_dconly_avx) void x264_idct_dequant_2x4_dconly_avx ( dctcoef dct[8], int dequant_mf[6][16], int i_qp ); +#define x264_optimize_chroma_2x2_dc_sse2 x264_template(optimize_chroma_2x2_dc_sse2) int x264_optimize_chroma_2x2_dc_sse2( dctcoef dct[4], int dequant_mf ); +#define x264_optimize_chroma_2x2_dc_ssse3 x264_template(optimize_chroma_2x2_dc_ssse3) int x264_optimize_chroma_2x2_dc_ssse3( dctcoef dct[4], int dequant_mf ); +#define x264_optimize_chroma_2x2_dc_sse4 x264_template(optimize_chroma_2x2_dc_sse4) int x264_optimize_chroma_2x2_dc_sse4( dctcoef dct[4], int dequant_mf ); +#define x264_optimize_chroma_2x2_dc_avx x264_template(optimize_chroma_2x2_dc_avx) int x264_optimize_chroma_2x2_dc_avx( dctcoef dct[4], int dequant_mf ); +#define x264_denoise_dct_mmx x264_template(denoise_dct_mmx) void x264_denoise_dct_mmx ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ); +#define x264_denoise_dct_sse2 x264_template(denoise_dct_sse2) void x264_denoise_dct_sse2 ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ); +#define x264_denoise_dct_ssse3 x264_template(denoise_dct_ssse3) void x264_denoise_dct_ssse3( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ); +#define x264_denoise_dct_avx x264_template(denoise_dct_avx) void x264_denoise_dct_avx ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ); +#define x264_denoise_dct_avx2 x264_template(denoise_dct_avx2) void x264_denoise_dct_avx2 ( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ); +#define x264_decimate_score15_sse2 x264_template(decimate_score15_sse2) int x264_decimate_score15_sse2( dctcoef *dct ); +#define x264_decimate_score15_ssse3 x264_template(decimate_score15_ssse3) int x264_decimate_score15_ssse3( dctcoef *dct ); +#define x264_decimate_score15_avx512 x264_template(decimate_score15_avx512) int x264_decimate_score15_avx512( dctcoef *dct ); +#define x264_decimate_score16_sse2 x264_template(decimate_score16_sse2) int x264_decimate_score16_sse2( dctcoef *dct ); +#define x264_decimate_score16_ssse3 x264_template(decimate_score16_ssse3) int x264_decimate_score16_ssse3( dctcoef *dct ); +#define x264_decimate_score16_avx512 x264_template(decimate_score16_avx512) int x264_decimate_score16_avx512( dctcoef *dct ); +#define x264_decimate_score64_sse2 x264_template(decimate_score64_sse2) int x264_decimate_score64_sse2( dctcoef *dct ); +#define x264_decimate_score64_ssse3 x264_template(decimate_score64_ssse3) int x264_decimate_score64_ssse3( dctcoef *dct ); +#define x264_decimate_score64_avx2 x264_template(decimate_score64_avx2) int x264_decimate_score64_avx2( int16_t *dct ); +#define x264_decimate_score64_avx512 x264_template(decimate_score64_avx512) int x264_decimate_score64_avx512( dctcoef *dct ); +#define x264_coeff_last4_mmx2 x264_template(coeff_last4_mmx2) int x264_coeff_last4_mmx2( dctcoef *dct ); +#define x264_coeff_last8_mmx2 x264_template(coeff_last8_mmx2) int x264_coeff_last8_mmx2( dctcoef *dct ); +#define x264_coeff_last15_mmx2 x264_template(coeff_last15_mmx2) int x264_coeff_last15_mmx2( dctcoef *dct ); +#define x264_coeff_last16_mmx2 x264_template(coeff_last16_mmx2) int x264_coeff_last16_mmx2( dctcoef *dct ); +#define x264_coeff_last64_mmx2 x264_template(coeff_last64_mmx2) int x264_coeff_last64_mmx2( dctcoef *dct ); +#define x264_coeff_last8_sse2 x264_template(coeff_last8_sse2) int x264_coeff_last8_sse2( dctcoef *dct ); +#define x264_coeff_last15_sse2 x264_template(coeff_last15_sse2) int x264_coeff_last15_sse2( dctcoef *dct ); +#define x264_coeff_last16_sse2 x264_template(coeff_last16_sse2) int x264_coeff_last16_sse2( dctcoef *dct ); +#define x264_coeff_last64_sse2 x264_template(coeff_last64_sse2) int x264_coeff_last64_sse2( dctcoef *dct ); +#define x264_coeff_last4_lzcnt x264_template(coeff_last4_lzcnt) int x264_coeff_last4_lzcnt( dctcoef *dct ); +#define x264_coeff_last8_lzcnt x264_template(coeff_last8_lzcnt) int x264_coeff_last8_lzcnt( dctcoef *dct ); +#define x264_coeff_last15_lzcnt x264_template(coeff_last15_lzcnt) int x264_coeff_last15_lzcnt( dctcoef *dct ); +#define x264_coeff_last16_lzcnt x264_template(coeff_last16_lzcnt) int x264_coeff_last16_lzcnt( dctcoef *dct ); +#define x264_coeff_last64_lzcnt x264_template(coeff_last64_lzcnt) int x264_coeff_last64_lzcnt( dctcoef *dct ); +#define x264_coeff_last64_avx2 x264_template(coeff_last64_avx2) int x264_coeff_last64_avx2 ( dctcoef *dct ); +#define x264_coeff_last4_avx512 x264_template(coeff_last4_avx512) int x264_coeff_last4_avx512( int32_t *dct ); +#define x264_coeff_last8_avx512 x264_template(coeff_last8_avx512) int x264_coeff_last8_avx512( dctcoef *dct ); +#define x264_coeff_last15_avx512 x264_template(coeff_last15_avx512) int x264_coeff_last15_avx512( dctcoef *dct ); +#define x264_coeff_last16_avx512 x264_template(coeff_last16_avx512) int x264_coeff_last16_avx512( dctcoef *dct ); +#define x264_coeff_last64_avx512 x264_template(coeff_last64_avx512) int x264_coeff_last64_avx512( dctcoef *dct ); +#define x264_coeff_level_run16_mmx2 x264_template(coeff_level_run16_mmx2) int x264_coeff_level_run16_mmx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run16_sse2 x264_template(coeff_level_run16_sse2) int x264_coeff_level_run16_sse2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run16_lzcnt x264_template(coeff_level_run16_lzcnt) int x264_coeff_level_run16_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run16_ssse3 x264_template(coeff_level_run16_ssse3) int x264_coeff_level_run16_ssse3( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run16_ssse3_lzcnt x264_template(coeff_level_run16_ssse3_lzcnt) int x264_coeff_level_run16_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run16_avx2 x264_template(coeff_level_run16_avx2) int x264_coeff_level_run16_avx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_mmx2 x264_template(coeff_level_run15_mmx2) int x264_coeff_level_run15_mmx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_sse2 x264_template(coeff_level_run15_sse2) int x264_coeff_level_run15_sse2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_lzcnt x264_template(coeff_level_run15_lzcnt) int x264_coeff_level_run15_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_ssse3 x264_template(coeff_level_run15_ssse3) int x264_coeff_level_run15_ssse3( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_ssse3_lzcnt x264_template(coeff_level_run15_ssse3_lzcnt) int x264_coeff_level_run15_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run15_avx2 x264_template(coeff_level_run15_avx2) int x264_coeff_level_run15_avx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run4_mmx2 x264_template(coeff_level_run4_mmx2) int x264_coeff_level_run4_mmx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run4_lzcnt x264_template(coeff_level_run4_lzcnt) int x264_coeff_level_run4_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run4_ssse3 x264_template(coeff_level_run4_ssse3) int x264_coeff_level_run4_ssse3( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run4_ssse3_lzcnt x264_template(coeff_level_run4_ssse3_lzcnt) int x264_coeff_level_run4_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run8_mmx2 x264_template(coeff_level_run8_mmx2) int x264_coeff_level_run8_mmx2( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run8_lzcnt x264_template(coeff_level_run8_lzcnt) int x264_coeff_level_run8_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run8_sse2 x264_template(coeff_level_run8_sse2) int x264_coeff_level_run8_sse2( dctcoef *dct, x264_run_level_t *runlevel ); -int x264_coeff_level_run8_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run8_ssse3 x264_template(coeff_level_run8_ssse3) int x264_coeff_level_run8_ssse3( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_coeff_level_run8_ssse3_lzcnt x264_template(coeff_level_run8_ssse3_lzcnt) int x264_coeff_level_run8_ssse3_lzcnt( dctcoef *dct, x264_run_level_t *runlevel ); +#define x264_trellis_cabac_4x4_sse2 x264_template(trellis_cabac_4x4_sse2) int x264_trellis_cabac_4x4_sse2 ( TRELLIS_PARAMS, int b_ac ); +#define x264_trellis_cabac_4x4_ssse3 x264_template(trellis_cabac_4x4_ssse3) int x264_trellis_cabac_4x4_ssse3( TRELLIS_PARAMS, int b_ac ); +#define x264_trellis_cabac_8x8_sse2 x264_template(trellis_cabac_8x8_sse2) int x264_trellis_cabac_8x8_sse2 ( TRELLIS_PARAMS, int b_interlaced ); +#define x264_trellis_cabac_8x8_ssse3 x264_template(trellis_cabac_8x8_ssse3) int x264_trellis_cabac_8x8_ssse3( TRELLIS_PARAMS, int b_interlaced ); +#define x264_trellis_cabac_4x4_psy_sse2 x264_template(trellis_cabac_4x4_psy_sse2) int x264_trellis_cabac_4x4_psy_sse2 ( TRELLIS_PARAMS, int b_ac, dctcoef *fenc_dct, int i_psy_trellis ); +#define x264_trellis_cabac_4x4_psy_ssse3 x264_template(trellis_cabac_4x4_psy_ssse3) int x264_trellis_cabac_4x4_psy_ssse3( TRELLIS_PARAMS, int b_ac, dctcoef *fenc_dct, int i_psy_trellis ); +#define x264_trellis_cabac_8x8_psy_sse2 x264_template(trellis_cabac_8x8_psy_sse2) int x264_trellis_cabac_8x8_psy_sse2 ( TRELLIS_PARAMS, int b_interlaced, dctcoef *fenc_dct, int i_psy_trellis ); +#define x264_trellis_cabac_8x8_psy_ssse3 x264_template(trellis_cabac_8x8_psy_ssse3) int x264_trellis_cabac_8x8_psy_ssse3( TRELLIS_PARAMS, int b_interlaced, dctcoef *fenc_dct, int i_psy_trellis ); +#define x264_trellis_cabac_dc_sse2 x264_template(trellis_cabac_dc_sse2) int x264_trellis_cabac_dc_sse2 ( TRELLIS_PARAMS, int i_coefs ); +#define x264_trellis_cabac_dc_ssse3 x264_template(trellis_cabac_dc_ssse3) int x264_trellis_cabac_dc_ssse3( TRELLIS_PARAMS, int i_coefs ); +#define x264_trellis_cabac_chroma_422_dc_sse2 x264_template(trellis_cabac_chroma_422_dc_sse2) int x264_trellis_cabac_chroma_422_dc_sse2 ( TRELLIS_PARAMS ); +#define x264_trellis_cabac_chroma_422_dc_ssse3 x264_template(trellis_cabac_chroma_422_dc_ssse3) int x264_trellis_cabac_chroma_422_dc_ssse3( TRELLIS_PARAMS ); #endif diff -Nru x264-0.152.2854+gite9a5903/common/x86/sad16-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/sad16-a.asm --- x264-0.152.2854+gite9a5903/common/x86/sad16-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/sad16-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad16-a.asm: x86 high depth sad functions ;***************************************************************************** -;* Copyright (C) 2010-2017 x264 project +;* Copyright (C) 2010-2019 x264 project ;* ;* Authors: Oskar Arvidsson ;* Henrik Gramner diff -Nru x264-0.152.2854+gite9a5903/common/x86/sad-a.asm x264-0.158.2988+git-20191101.7817004/common/x86/sad-a.asm --- x264-0.152.2854+gite9a5903/common/x86/sad-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/sad-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad-a.asm: x86 sad functions ;***************************************************************************** -;* Copyright (C) 2003-2017 x264 project +;* Copyright (C) 2003-2019 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -1920,7 +1920,7 @@ shl r4d, 4 ; code size = 80 %endif %define sad_w16_addr (sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1)) -%ifdef PIC +%if ARCH_X86_64 lea r5, [sad_w16_addr] add r5, r4 %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/trellis-64.asm x264-0.158.2988+git-20191101.7817004/common/x86/trellis-64.asm --- x264-0.152.2854+gite9a5903/common/x86/trellis-64.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/trellis-64.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* trellis-64.asm: x86_64 trellis quantization ;***************************************************************************** -;* Copyright (C) 2012-2017 x264 project +;* Copyright (C) 2012-2019 x264 project ;* ;* Authors: Loren Merritt ;* @@ -61,17 +61,17 @@ cextern pd_8 cextern pd_0123 cextern pd_4567 -cextern cabac_entropy -cextern cabac_transition +cextern_common cabac_entropy +cextern_common cabac_transition cextern cabac_size_unary cextern cabac_transition_unary -cextern dct4_weight_tab -cextern dct8_weight_tab -cextern dct4_weight2_tab -cextern dct8_weight2_tab -cextern last_coeff_flag_offset_8x8 -cextern significant_coeff_flag_offset_8x8 -cextern coeff_flag_offset_chroma_422_dc +cextern_common dct4_weight_tab +cextern_common dct8_weight_tab +cextern_common dct4_weight2_tab +cextern_common dct8_weight2_tab +cextern_common last_coeff_flag_offset_8x8 +cextern_common significant_coeff_flag_offset_8x8 +cextern_common coeff_flag_offset_chroma_422_dc SECTION .text @@ -202,7 +202,6 @@ paddd m6, m6 %define unquant_mf m6 %endif -%ifdef PIC %if dc == 0 mov unquant_mfm, unquant_mfq %endif @@ -212,9 +211,6 @@ ; (Any address in .text would work, this one was just convenient.) lea r0, [$$] %define GLOBAL +r0-$$ -%else - %define GLOBAL -%endif TRELLIS_LOOP 0 ; node_ctx 0..3 TRELLIS_LOOP 1 ; node_ctx 1..7 @@ -304,12 +300,8 @@ mov r10, cabac_state_sigm %if num_coefs == 64 mov r6d, b_interlacedm -%ifdef PIC add r6d, iid movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 GLOBAL] -%else - movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 + iiq] -%endif movzx r10, byte [r10 + r6] %elif num_coefs == 8 movzx r13, byte [coeff_flag_offset_chroma_422_dc + iiq GLOBAL] @@ -408,12 +400,8 @@ %if dc pmuludq m0, unquant_mf %else -%ifdef PIC mov r10, unquant_mfm LOAD_DUP m3, [r10 + zigzagiq*4] -%else - LOAD_DUP m3, [unquant_mfq + zigzagiq*4] -%endif pmuludq m0, m3 %endif paddd m0, [pq_128] diff -Nru x264-0.152.2854+gite9a5903/common/x86/util.h x264-0.158.2988+git-20191101.7817004/common/x86/util.h --- x264-0.152.2854+gite9a5903/common/x86/util.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/util.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * util.h: x86 inline asm ***************************************************************************** - * Copyright (C) 2008-2017 x264 project + * Copyright (C) 2008-2019 x264 project * * Authors: Fiona Glaser * Loren Merritt @@ -122,7 +122,7 @@ } #define x264_predictor_clip x264_predictor_clip_mmx2 -static int ALWAYS_INLINE x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +static ALWAYS_INLINE int x264_predictor_clip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) { static const uint32_t pd_32 = 0x20; intptr_t tmp = (intptr_t)mv_limit, mvc_max = i_mvc, i = 0; @@ -184,7 +184,7 @@ /* Same as the above, except we do (mv + 2) >> 2 on the input. */ #define x264_predictor_roundclip x264_predictor_roundclip_mmx2 -static int ALWAYS_INLINE x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) +static ALWAYS_INLINE int x264_predictor_roundclip_mmx2( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv ) { static const uint64_t pw_2 = 0x0002000200020002ULL; static const uint32_t pd_32 = 0x20; diff -Nru x264-0.152.2854+gite9a5903/common/x86/x86inc.asm x264-0.158.2988+git-20191101.7817004/common/x86/x86inc.asm --- x264-0.152.2854+gite9a5903/common/x86/x86inc.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/x86inc.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,12 +1,12 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2017 x264 project +;* Copyright (C) 2005-2019 x264 project ;* ;* Authors: Loren Merritt +;* Henrik Gramner ;* Anton Mitrofanov ;* Fiona Glaser -;* Henrik Gramner ;* ;* Permission to use, copy, modify, and/or distribute this software for any ;* purpose with or without fee is hereby granted, provided that the above @@ -65,12 +65,19 @@ %endif %define FORMAT_ELF 0 +%define FORMAT_MACHO 0 %ifidn __OUTPUT_FORMAT__,elf %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf32 %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf64 %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,macho + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho32 + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho64 + %define FORMAT_MACHO 1 %endif %ifdef PREFIX @@ -80,23 +87,30 @@ %endif %macro SECTION_RODATA 0-1 16 - SECTION .rodata align=%1 + %ifidn __OUTPUT_FORMAT__,win32 + SECTION .rdata align=%1 + %elif WIN64 + SECTION .rdata align=%1 + %else + SECTION .rodata align=%1 + %endif %endmacro -%if WIN64 - %define PIC -%elif ARCH_X86_64 == 0 -; x86_32 doesn't require PIC. -; Some distros prefer shared objects to be PIC, but nothing breaks if -; the code contains a few textrels, so we'll skip that complexity. - %undef PIC -%endif -%ifdef PIC +%if ARCH_X86_64 + %define PIC 1 ; always use PIC on x86-64 default rel +%elifidn __OUTPUT_FORMAT__,win32 + %define PIC 0 ; PIC isn't used on 32-bit Windows +%elifndef PIC + %define PIC 0 %endif +%define HAVE_PRIVATE_EXTERN 1 %ifdef __NASM_VER__ %use smartalign + %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14 + %define HAVE_PRIVATE_EXTERN 0 + %endif %endif ; Macros to eliminate most code duplication between x86_32 and x86_64: @@ -214,6 +228,18 @@ %define gprsize 4 %endif +%macro LEA 2 +%if ARCH_X86_64 + lea %1, [%2] +%elif PIC + call $+5 ; special-cased to not affect the RSB on most CPU:s + pop %1 + add %1, (%2)-$+1 +%else + mov %1, %2 +%endif +%endmacro + %macro PUSH 1 push %1 %ifidn rstk, rsp @@ -275,6 +301,10 @@ %endif %endmacro +%if ARCH_X86_64 == 0 + %define movsxd movifnidn +%endif + %macro movsxdifnidn 2 %ifnidn %1, %2 movsxd %1, %2 @@ -663,7 +693,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp -%macro TAIL_CALL 2 ; callee, is_nonadjacent +%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent %if has_epilogue call %1 RET @@ -693,22 +723,25 @@ %endmacro %macro cglobal_internal 2-3+ annotate_function_size - %if %1 - %xdefine %%FUNCTION_PREFIX private_prefix - %xdefine %%VISIBILITY hidden - %else - %xdefine %%FUNCTION_PREFIX public_prefix - %xdefine %%VISIBILITY - %endif %ifndef cglobaled_%2 - %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) + %if %1 + %xdefine %2 mangle(private_prefix %+ _ %+ %2) + %else + %xdefine %2 mangle(public_prefix %+ _ %+ %2) + %endif %xdefine %2.skip_prologue %2 %+ .skip_prologue CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 %xdefine current_function_section __SECT__ %if FORMAT_ELF - global %2:function %%VISIBILITY + %if %1 + global %2:function hidden + %else + global %2:function + %endif + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN && %1 + global %2:private_extern %else global %2 %endif @@ -725,6 +758,18 @@ %endif %endmacro +; Create a global symbol from a local label with the correct name mangling and type +%macro cglobal_label 1 + %if FORMAT_ELF + global current_function %+ %1:function hidden + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN + global current_function %+ %1:private_extern + %else + global current_function %+ %1 + %endif + %1: +%endmacro + %macro cextern 1 %xdefine %1 mangle(private_prefix %+ _ %+ %1) CAT_XDEFINE cglobaled_, %1, 1 @@ -744,6 +789,8 @@ %xdefine %1 mangle(private_prefix %+ _ %+ %1) %if FORMAT_ELF global %1:data hidden + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN + global %1:private_extern %else global %1 %endif @@ -788,19 +835,20 @@ %assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 %assign cpuflags_sse42 (1<<11)| cpuflags_sse4 %assign cpuflags_aesni (1<<12)| cpuflags_sse42 -%assign cpuflags_avx (1<<13)| cpuflags_sse42 -%assign cpuflags_xop (1<<14)| cpuflags_avx -%assign cpuflags_fma4 (1<<15)| cpuflags_avx -%assign cpuflags_fma3 (1<<16)| cpuflags_avx -%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 -%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 -%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL - -%assign cpuflags_cache32 (1<<21) -%assign cpuflags_cache64 (1<<22) -%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<24) +%assign cpuflags_gfni (1<<13)| cpuflags_sse42 +%assign cpuflags_avx (1<<14)| cpuflags_sse42 +%assign cpuflags_xop (1<<15)| cpuflags_avx +%assign cpuflags_fma4 (1<<16)| cpuflags_avx +%assign cpuflags_fma3 (1<<17)| cpuflags_avx +%assign cpuflags_bmi1 (1<<18)| cpuflags_avx|cpuflags_lzcnt +%assign cpuflags_bmi2 (1<<19)| cpuflags_bmi1 +%assign cpuflags_avx2 (1<<20)| cpuflags_fma3|cpuflags_bmi2 +%assign cpuflags_avx512 (1<<21)| cpuflags_avx2 ; F, CD, BW, DQ, VL + +%assign cpuflags_cache32 (1<<22) +%assign cpuflags_cache64 (1<<23) +%assign cpuflags_aligned (1<<24) ; not a cpu feature, but a function variant +%assign cpuflags_atom (1<<25) ; Returns a boolean value expressing whether or not the specified cpuflag is enabled. %define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) @@ -871,6 +919,36 @@ %undef %1%2 %endmacro +%macro DEFINE_MMREGS 1 ; mmtype + %assign %%prev_mmregs 0 + %ifdef num_mmregs + %assign %%prev_mmregs num_mmregs + %endif + + %assign num_mmregs 8 + %if ARCH_X86_64 && mmsize >= 16 + %assign num_mmregs 16 + %if cpuflag(avx512) || mmsize == 64 + %assign num_mmregs 32 + %endif + %endif + + %assign %%i 0 + %rep num_mmregs + CAT_XDEFINE m, %%i, %1 %+ %%i + CAT_XDEFINE nn%1, %%i, %%i + %assign %%i %%i+1 + %endrep + %if %%prev_mmregs > num_mmregs + %rep %%prev_mmregs - num_mmregs + CAT_UNDEF m, %%i + CAT_UNDEF nn %+ mmtype, %%i + %assign %%i %%i+1 + %endrep + %endif + %xdefine mmtype %1 +%endmacro + ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg %if ARCH_X86_64 && cpuflag(avx512) @@ -887,47 +965,26 @@ %assign avx_enabled 0 %define RESET_MM_PERMUTATION INIT_MMX %1 %define mmsize 8 - %define num_mmregs 8 %define mova movq %define movu movq %define movh movd %define movnta movntq - %assign %%i 0 - %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nnmm, %%i, %%i - %assign %%i %%i+1 - %endrep - %rep 24 - CAT_UNDEF m, %%i - CAT_UNDEF nnmm, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS mm %endmacro %macro INIT_XMM 0-1+ %assign avx_enabled 0 %define RESET_MM_PERMUTATION INIT_XMM %1 %define mmsize 16 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 32 - %endif %define mova movdqa %define movu movdqu %define movh movq %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nnxmm, %%i, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS xmm %if WIN64 - ; Swap callee-saved registers with volatile registers - AVX512_MM_PERMUTATION 6 + AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers %endif %endmacro @@ -935,21 +992,12 @@ %assign avx_enabled 1 %define RESET_MM_PERMUTATION INIT_YMM %1 %define mmsize 32 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 32 - %endif %define mova movdqa %define movu movdqu %undef movh %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nnymm, %%i, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS ymm AVX512_MM_PERMUTATION %endmacro @@ -957,21 +1005,12 @@ %assign avx_enabled 1 %define RESET_MM_PERMUTATION INIT_ZMM %1 %define mmsize 64 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 32 - %endif %define mova movdqa %define movu movdqu %undef movh %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, zmm %+ %%i - CAT_XDEFINE nnzmm, %%i, %%i - %assign %%i %%i+1 - %endrep INIT_CPUFLAGS %1 + DEFINE_MMREGS zmm AVX512_MM_PERMUTATION %endmacro @@ -1070,19 +1109,32 @@ %endif %assign %%i 0 %rep num_mmregs - CAT_XDEFINE %%f, %%i, m %+ %%i + %xdefine %%tmp m %+ %%i + CAT_XDEFINE %%f, %%i, regnumof %+ %%tmp %assign %%i %%i+1 %endrep %endmacro -%macro LOAD_MM_PERMUTATION 1 ; name to load from - %ifdef %1_m0 +%macro LOAD_MM_PERMUTATION 0-1 ; name to load from + %if %0 + %xdefine %%f %1_m + %else + %xdefine %%f current_function %+ _m + %endif + %xdefine %%tmp %%f %+ 0 + %ifnum %%tmp + RESET_MM_PERMUTATION %assign %%i 0 %rep num_mmregs - CAT_XDEFINE m, %%i, %1_m %+ %%i - CAT_XDEFINE nn, m %+ %%i, %%i + %xdefine %%tmp %%f %+ %%i + CAT_XDEFINE %%m, %%i, m %+ %%tmp %assign %%i %%i+1 %endrep + %rep num_mmregs + %assign %%i %%i-1 + CAT_XDEFINE m, %%i, %%m %+ %%i + CAT_XDEFINE nn, m %+ %%i, %%i + %endrep %endif %endmacro @@ -1188,8 +1240,22 @@ %ifdef cpuname %if notcpuflag(%2) %error use of ``%1'' %2 instruction in cpuname function: current_function - %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 + %elif %3 == 0 && __sizeofreg == 16 && notcpuflag(sse2) %error use of ``%1'' sse2 instruction in cpuname function: current_function + %elif %3 == 0 && __sizeofreg == 32 && notcpuflag(avx2) + %error use of ``%1'' avx2 instruction in cpuname function: current_function + %elif __sizeofreg == 16 && notcpuflag(sse) + %error use of ``%1'' sse instruction in cpuname function: current_function + %elif __sizeofreg == 32 && notcpuflag(avx) + %error use of ``%1'' avx instruction in cpuname function: current_function + %elif __sizeofreg == 64 && notcpuflag(avx512) + %error use of ``%1'' avx512 instruction in cpuname function: current_function + %elifidn %1, pextrw ; special case because the base instruction is mmx2, + %ifnid %6 ; but sse4 is required for memory operands + %if notcpuflag(sse4) + %error use of ``%1'' sse4 instruction in cpuname function: current_function + %endif + %endif %endif %endif %endif @@ -1233,9 +1299,40 @@ %elif %0 >= 9 __instr %6, %7, %8, %9 %elif %0 == 8 - __instr %6, %7, %8 + %if avx_enabled && %5 + %xdefine __src1 %7 + %xdefine __src2 %8 + %ifnum regnumof%7 + %ifnum regnumof%8 + %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32 + ; Most VEX-encoded instructions require an additional byte to encode when + ; src2 is a high register (e.g. m8..15). If the instruction is commutative + ; we can swap src1 and src2 when doing so reduces the instruction length. + %xdefine __src1 %8 + %xdefine __src2 %7 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7, %8 + %endif %elif %0 == 7 - __instr %6, %7 + %if avx_enabled && %5 + %xdefine __src1 %6 + %xdefine __src2 %7 + %ifnum regnumof%6 + %ifnum regnumof%7 + %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32 + %xdefine __src1 %7 + %xdefine __src2 %6 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7 + %endif %else __instr %6 %endif @@ -1270,12 +1367,12 @@ AVX_INSTR addss, sse, 1, 0, 0 AVX_INSTR addsubpd, sse3, 1, 0, 0 AVX_INSTR addsubps, sse3, 1, 0, 0 -AVX_INSTR aesdec, fnord, 0, 0, 0 -AVX_INSTR aesdeclast, fnord, 0, 0, 0 -AVX_INSTR aesenc, fnord, 0, 0, 0 -AVX_INSTR aesenclast, fnord, 0, 0, 0 -AVX_INSTR aesimc -AVX_INSTR aeskeygenassist +AVX_INSTR aesdec, aesni, 0, 0, 0 +AVX_INSTR aesdeclast, aesni, 0, 0, 0 +AVX_INSTR aesenc, aesni, 0, 0, 0 +AVX_INSTR aesenclast, aesni, 0, 0, 0 +AVX_INSTR aesimc, aesni +AVX_INSTR aeskeygenassist, aesni AVX_INSTR andnpd, sse2, 1, 0, 0 AVX_INSTR andnps, sse, 1, 0, 0 AVX_INSTR andpd, sse2, 1, 0, 1 @@ -1284,42 +1381,77 @@ AVX_INSTR blendps, sse4, 1, 1, 0 AVX_INSTR blendvpd, sse4 ; can't be emulated AVX_INSTR blendvps, sse4 ; can't be emulated +AVX_INSTR cmpeqpd, sse2, 1, 0, 1 +AVX_INSTR cmpeqps, sse, 1, 0, 1 +AVX_INSTR cmpeqsd, sse2, 1, 0, 0 +AVX_INSTR cmpeqss, sse, 1, 0, 0 +AVX_INSTR cmplepd, sse2, 1, 0, 0 +AVX_INSTR cmpleps, sse, 1, 0, 0 +AVX_INSTR cmplesd, sse2, 1, 0, 0 +AVX_INSTR cmpless, sse, 1, 0, 0 +AVX_INSTR cmpltpd, sse2, 1, 0, 0 +AVX_INSTR cmpltps, sse, 1, 0, 0 +AVX_INSTR cmpltsd, sse2, 1, 0, 0 +AVX_INSTR cmpltss, sse, 1, 0, 0 +AVX_INSTR cmpneqpd, sse2, 1, 0, 1 +AVX_INSTR cmpneqps, sse, 1, 0, 1 +AVX_INSTR cmpneqsd, sse2, 1, 0, 0 +AVX_INSTR cmpneqss, sse, 1, 0, 0 +AVX_INSTR cmpnlepd, sse2, 1, 0, 0 +AVX_INSTR cmpnleps, sse, 1, 0, 0 +AVX_INSTR cmpnlesd, sse2, 1, 0, 0 +AVX_INSTR cmpnless, sse, 1, 0, 0 +AVX_INSTR cmpnltpd, sse2, 1, 0, 0 +AVX_INSTR cmpnltps, sse, 1, 0, 0 +AVX_INSTR cmpnltsd, sse2, 1, 0, 0 +AVX_INSTR cmpnltss, sse, 1, 0, 0 +AVX_INSTR cmpordpd, sse2 1, 0, 1 +AVX_INSTR cmpordps, sse 1, 0, 1 +AVX_INSTR cmpordsd, sse2 1, 0, 0 +AVX_INSTR cmpordss, sse 1, 0, 0 AVX_INSTR cmppd, sse2, 1, 1, 0 AVX_INSTR cmpps, sse, 1, 1, 0 AVX_INSTR cmpsd, sse2, 1, 1, 0 AVX_INSTR cmpss, sse, 1, 1, 0 -AVX_INSTR comisd, sse2 -AVX_INSTR comiss, sse -AVX_INSTR cvtdq2pd, sse2 -AVX_INSTR cvtdq2ps, sse2 -AVX_INSTR cvtpd2dq, sse2 -AVX_INSTR cvtpd2ps, sse2 -AVX_INSTR cvtps2dq, sse2 -AVX_INSTR cvtps2pd, sse2 -AVX_INSTR cvtsd2si, sse2 +AVX_INSTR cmpunordpd, sse2, 1, 0, 1 +AVX_INSTR cmpunordps, sse, 1, 0, 1 +AVX_INSTR cmpunordsd, sse2, 1, 0, 0 +AVX_INSTR cmpunordss, sse, 1, 0, 0 +AVX_INSTR comisd, sse2, 1 +AVX_INSTR comiss, sse, 1 +AVX_INSTR cvtdq2pd, sse2, 1 +AVX_INSTR cvtdq2ps, sse2, 1 +AVX_INSTR cvtpd2dq, sse2, 1 +AVX_INSTR cvtpd2ps, sse2, 1 +AVX_INSTR cvtps2dq, sse2, 1 +AVX_INSTR cvtps2pd, sse2, 1 +AVX_INSTR cvtsd2si, sse2, 1 AVX_INSTR cvtsd2ss, sse2, 1, 0, 0 AVX_INSTR cvtsi2sd, sse2, 1, 0, 0 AVX_INSTR cvtsi2ss, sse, 1, 0, 0 AVX_INSTR cvtss2sd, sse2, 1, 0, 0 -AVX_INSTR cvtss2si, sse -AVX_INSTR cvttpd2dq, sse2 -AVX_INSTR cvttps2dq, sse2 -AVX_INSTR cvttsd2si, sse2 -AVX_INSTR cvttss2si, sse +AVX_INSTR cvtss2si, sse, 1 +AVX_INSTR cvttpd2dq, sse2, 1 +AVX_INSTR cvttps2dq, sse2, 1 +AVX_INSTR cvttsd2si, sse2, 1 +AVX_INSTR cvttss2si, sse, 1 AVX_INSTR divpd, sse2, 1, 0, 0 AVX_INSTR divps, sse, 1, 0, 0 AVX_INSTR divsd, sse2, 1, 0, 0 AVX_INSTR divss, sse, 1, 0, 0 AVX_INSTR dppd, sse4, 1, 1, 0 AVX_INSTR dpps, sse4, 1, 1, 0 -AVX_INSTR extractps, sse4 +AVX_INSTR extractps, sse4, 1 +AVX_INSTR gf2p8affineinvqb, gfni, 0, 1, 0 +AVX_INSTR gf2p8affineqb, gfni, 0, 1, 0 +AVX_INSTR gf2p8mulb, gfni, 0, 0, 0 AVX_INSTR haddpd, sse3, 1, 0, 0 AVX_INSTR haddps, sse3, 1, 0, 0 AVX_INSTR hsubpd, sse3, 1, 0, 0 AVX_INSTR hsubps, sse3, 1, 0, 0 AVX_INSTR insertps, sse4, 1, 1, 0 AVX_INSTR lddqu, sse3 -AVX_INSTR ldmxcsr, sse +AVX_INSTR ldmxcsr, sse, 1 AVX_INSTR maskmovdqu, sse2 AVX_INSTR maxpd, sse2, 1, 0, 1 AVX_INSTR maxps, sse, 1, 0, 1 @@ -1329,10 +1461,10 @@ AVX_INSTR minps, sse, 1, 0, 1 AVX_INSTR minsd, sse2, 1, 0, 0 AVX_INSTR minss, sse, 1, 0, 0 -AVX_INSTR movapd, sse2 -AVX_INSTR movaps, sse +AVX_INSTR movapd, sse2, 1 +AVX_INSTR movaps, sse, 1 AVX_INSTR movd, mmx -AVX_INSTR movddup, sse3 +AVX_INSTR movddup, sse3, 1 AVX_INSTR movdqa, sse2 AVX_INSTR movdqu, sse2 AVX_INSTR movhlps, sse, 1, 0, 0 @@ -1341,19 +1473,19 @@ AVX_INSTR movlhps, sse, 1, 0, 0 AVX_INSTR movlpd, sse2, 1, 0, 0 AVX_INSTR movlps, sse, 1, 0, 0 -AVX_INSTR movmskpd, sse2 -AVX_INSTR movmskps, sse +AVX_INSTR movmskpd, sse2, 1 +AVX_INSTR movmskps, sse, 1 AVX_INSTR movntdq, sse2 AVX_INSTR movntdqa, sse4 -AVX_INSTR movntpd, sse2 -AVX_INSTR movntps, sse +AVX_INSTR movntpd, sse2, 1 +AVX_INSTR movntps, sse, 1 AVX_INSTR movq, mmx AVX_INSTR movsd, sse2, 1, 0, 0 -AVX_INSTR movshdup, sse3 -AVX_INSTR movsldup, sse3 +AVX_INSTR movshdup, sse3, 1 +AVX_INSTR movsldup, sse3, 1 AVX_INSTR movss, sse, 1, 0, 0 -AVX_INSTR movupd, sse2 -AVX_INSTR movups, sse +AVX_INSTR movupd, sse2, 1 +AVX_INSTR movups, sse, 1 AVX_INSTR mpsadbw, sse4, 0, 1, 0 AVX_INSTR mulpd, sse2, 1, 0, 1 AVX_INSTR mulps, sse, 1, 0, 1 @@ -1486,27 +1618,27 @@ AVX_INSTR punpckldq, mmx, 0, 0, 0 AVX_INSTR punpcklqdq, sse2, 0, 0, 0 AVX_INSTR pxor, mmx, 0, 0, 1 -AVX_INSTR rcpps, sse +AVX_INSTR rcpps, sse, 1 AVX_INSTR rcpss, sse, 1, 0, 0 -AVX_INSTR roundpd, sse4 -AVX_INSTR roundps, sse4 +AVX_INSTR roundpd, sse4, 1 +AVX_INSTR roundps, sse4, 1 AVX_INSTR roundsd, sse4, 1, 1, 0 AVX_INSTR roundss, sse4, 1, 1, 0 -AVX_INSTR rsqrtps, sse +AVX_INSTR rsqrtps, sse, 1 AVX_INSTR rsqrtss, sse, 1, 0, 0 AVX_INSTR shufpd, sse2, 1, 1, 0 AVX_INSTR shufps, sse, 1, 1, 0 -AVX_INSTR sqrtpd, sse2 -AVX_INSTR sqrtps, sse +AVX_INSTR sqrtpd, sse2, 1 +AVX_INSTR sqrtps, sse, 1 AVX_INSTR sqrtsd, sse2, 1, 0, 0 AVX_INSTR sqrtss, sse, 1, 0, 0 -AVX_INSTR stmxcsr, sse +AVX_INSTR stmxcsr, sse, 1 AVX_INSTR subpd, sse2, 1, 0, 0 AVX_INSTR subps, sse, 1, 0, 0 AVX_INSTR subsd, sse2, 1, 0, 0 AVX_INSTR subss, sse, 1, 0, 0 -AVX_INSTR ucomisd, sse2 -AVX_INSTR ucomiss, sse +AVX_INSTR ucomisd, sse2, 1 +AVX_INSTR ucomiss, sse, 1 AVX_INSTR unpckhpd, sse2, 1, 0, 0 AVX_INSTR unpckhps, sse, 1, 0, 0 AVX_INSTR unpcklpd, sse2, 1, 0, 0 @@ -1519,6 +1651,38 @@ AVX_INSTR pfsub, 3dnow, 1, 0, 0 AVX_INSTR pfmul, 3dnow, 1, 0, 1 +;%1 == instruction +;%2 == minimal instruction set +%macro GPR_INSTR 2 + %macro %1 2-5 fnord, %1, %2 + %ifdef cpuname + %if notcpuflag(%5) + %error use of ``%4'' %5 instruction in cpuname function: current_function + %endif + %endif + %ifidn %3, fnord + %4 %1, %2 + %else + %4 %1, %2, %3 + %endif + %endmacro +%endmacro + +GPR_INSTR andn, bmi1 +GPR_INSTR bextr, bmi1 +GPR_INSTR blsi, bmi1 +GPR_INSTR blsr, bmi1 +GPR_INSTR blsmsk, bmi1 +GPR_INSTR bzhi, bmi2 +GPR_INSTR mulx, bmi2 +GPR_INSTR pdep, bmi2 +GPR_INSTR pext, bmi2 +GPR_INSTR popcnt, sse42 +GPR_INSTR rorx, bmi2 +GPR_INSTR sarx, bmi2 +GPR_INSTR shlx, bmi2 +GPR_INSTR shrx, bmi2 + ; base-4 constants for shuffles %assign i 0 %rep 256 @@ -1615,6 +1779,11 @@ %assign %%evex_required 1 %endif %endif + %ifnum regnumof%3 + %if regnumof%3 >= 16 || sizeof%3 > 32 + %assign %%evex_required 1 + %endif + %endif %if %%evex_required %6 %%args %else diff -Nru x264-0.152.2854+gite9a5903/common/x86/x86util.asm x264-0.158.2988+git-20191101.7817004/common/x86/x86util.asm --- x264-0.152.2854+gite9a5903/common/x86/x86util.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/common/x86/x86util.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86util.asm: x86 utility macros ;***************************************************************************** -;* Copyright (C) 2008-2017 x264 project +;* Copyright (C) 2008-2019 x264 project ;* ;* Authors: Holger Lubitz ;* Loren Merritt @@ -24,6 +24,23 @@ ;* For more information, contact us at licensing@x264.com. ;***************************************************************************** +; like cextern, but with a plain x264 prefix instead of a bitdepth-specific one +%macro cextern_common 1 + %xdefine %1 mangle(x264 %+ _ %+ %1) + CAT_XDEFINE cglobaled_, %1, 1 + extern %1 +%endmacro + +%ifndef BIT_DEPTH + %assign BIT_DEPTH 0 +%endif + +%if BIT_DEPTH > 8 + %assign HIGH_BIT_DEPTH 1 +%else + %assign HIGH_BIT_DEPTH 0 +%endif + %assign FENC_STRIDE 16 %assign FDEC_STRIDE 32 @@ -54,7 +71,6 @@ %endif %endmacro - %macro SBUTTERFLY 4 %ifidn %1, dqqq vperm2i128 m%4, m%2, m%3, q0301 ; punpckh diff -Nru x264-0.152.2854+gite9a5903/config.guess x264-0.158.2988+git-20191101.7817004/config.guess --- x264-0.152.2854+gite9a5903/config.guess 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/config.guess 2019-11-09 05:16:29.000000000 +0000 @@ -1,12 +1,14 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2017 Free Software Foundation, Inc. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2017-11-07' +timestamp='2012-09-25' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -15,22 +17,24 @@ # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # -# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. # # You can get the latest version of this script from: -# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess -# -# Please send patches to . - +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -39,7 +43,7 @@ Output the configuration name of the system \`$me' is run on. -Options: +Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -50,7 +54,9 @@ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2017 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -132,27 +138,6 @@ UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown -case "${UNAME_SYSTEM}" in -Linux|GNU|GNU/*) - # If the system lacks a compiler, then just pick glibc. - # We could probably try harder. - LIBC=gnu - - eval $set_cc_for_build - cat <<-EOF > $dummy.c - #include - #if defined(__UCLIBC__) - LIBC=uclibc - #elif defined(__dietlibc__) - LIBC=dietlibc - #else - LIBC=gnu - #endif - EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` - ;; -esac - # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in @@ -168,29 +153,19 @@ # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ - /sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || \ - echo unknown)` + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; - earmv*) - arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'` - endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'` - machine=${arch}${endian}-unknown - ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently (or will in the future) and ABI. + # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in - earm*) - os=netbsdelf - ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ @@ -207,13 +182,6 @@ os=netbsd ;; esac - # Determine ABI tags. - case "${UNAME_MACHINE_ARCH}" in - earm*) - expr='s/^earmv[0-9]/-eabi/;s/eb$//' - abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"` - ;; - esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need @@ -224,13 +192,13 @@ release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2` + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}${abi}" + echo "${machine}-${os}${release}" exit ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` @@ -240,13 +208,6 @@ UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; - *:LibertyBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} - exit ;; - *:MidnightBSD:*:*) - echo ${UNAME_MACHINE}-unknown-midnightbsd${UNAME_RELEASE} - exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -259,12 +220,6 @@ *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; - *:Sortix:*:*) - echo ${UNAME_MACHINE}-unknown-sortix - exit ;; - *:Redox:*:*) - echo ${UNAME_MACHINE}-unknown-redox - exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -281,46 +236,55 @@ ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV5 (21164)") - UNAME_MACHINE=alphaev5 ;; + UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") - UNAME_MACHINE=alphaev56 ;; + UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") - UNAME_MACHINE=alphapca56 ;; + UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") - UNAME_MACHINE=alphapca57 ;; + UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") - UNAME_MACHINE=alphaev6 ;; + UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") - UNAME_MACHINE=alphaev67 ;; + UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE=alphaev69 ;; + UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") - UNAME_MACHINE=alphaev7 ;; + UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") - UNAME_MACHINE=alphaev79 ;; + UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; @@ -380,16 +344,16 @@ exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH=i386 + SUN_ARCH="i386" # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH=x86_64 + SUN_ARCH="x86_64" fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -414,7 +378,7 @@ exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -482,13 +446,13 @@ #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); #endif #endif exit (-1); @@ -600,9 +564,8 @@ else IBM_ARCH=powerpc fi - if [ -x /usr/bin/lslpp ] ; then - IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | - awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi @@ -611,7 +574,7 @@ *:AIX:*:*) echo rs6000-ibm-aix exit ;; - ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and @@ -632,20 +595,20 @@ 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in - 9000/31?) HP_ARCH=m68000 ;; - 9000/[34]??) HP_ARCH=m68k ;; + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 - 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH=hppa2.0n ;; - 64) HP_ARCH=hppa2.0w ;; - '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi @@ -684,11 +647,11 @@ exit (0); } EOF - (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = hppa2.0w ] + if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build @@ -701,12 +664,12 @@ # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH=hppa2.0w + HP_ARCH="hppa2.0w" else - HP_ARCH=hppa64 + HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -746,7 +709,7 @@ { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) @@ -755,7 +718,7 @@ *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) @@ -811,14 +774,14 @@ echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -834,11 +797,10 @@ UNAME_PROCESSOR=`/usr/bin/uname -p` case ${UNAME_PROCESSOR} in amd64) - UNAME_PROCESSOR=x86_64 ;; - i386) - UNAME_PROCESSOR=i586 ;; + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac - echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin @@ -849,9 +811,13 @@ *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; - *:MSYS*:*) + i*:MSYS*:*) echo ${UNAME_MACHINE}-pc-msys exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; @@ -867,32 +833,47 @@ echo ia64-unknown-interix${UNAME_RELEASE} exit ;; esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) echo x86_64-unknown-cygwin exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; *:GNU:*:*) # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in @@ -905,60 +886,59 @@ EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC=gnulibc1 ; fi - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; - arc:Linux:*:* | arceb:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then - echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + echo ${UNAME_MACHINE}-unknown-linux-gnueabi else - echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + echo ${UNAME_MACHINE}-unknown-linux-gnueabihf fi fi exit ;; avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; cris:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-${LIBC} + echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; crisv32:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-${LIBC} - exit ;; - e2k:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; frv:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; hexagon:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:Linux:*:*) - echo ${UNAME_MACHINE}-pc-linux-${LIBC} + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" exit ;; ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; - k1om:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build @@ -977,69 +957,57 @@ #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; - mips64el:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; - openrisc*:Linux:*:*) - echo or1k-unknown-linux-${LIBC} - exit ;; - or32:Linux:*:* | or1k*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; padre:Linux:*:*) - echo sparc-unknown-linux-${LIBC} + echo sparc-unknown-linux-gnu exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-${LIBC} + echo hppa64-unknown-linux-gnu exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; - PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; - *) echo hppa-unknown-linux-${LIBC} ;; + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; esac exit ;; ppc64:Linux:*:*) - echo powerpc64-unknown-linux-${LIBC} - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-${LIBC} + echo powerpc64-unknown-linux-gnu exit ;; ppc64le:Linux:*:*) - echo powerpc64le-unknown-linux-${LIBC} + echo powerpc64le-unknown-linux-gnu exit ;; - ppcle:Linux:*:*) - echo powerpcle-unknown-linux-${LIBC} - exit ;; - riscv32:Linux:*:* | riscv64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; tile*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-${LIBC} + echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-pc-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. @@ -1075,7 +1043,7 @@ i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp exit ;; - i*86:*:4.*:*) + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} @@ -1115,7 +1083,7 @@ # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configure will decide that + # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1264,9 +1232,6 @@ SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; - SX-ACE:SUPER-UX:*:*) - echo sxace-nec-superux${UNAME_RELEASE} - exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1275,43 +1240,24 @@ exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - eval $set_cc_for_build - if test "$UNAME_PROCESSOR" = unknown ; then - UNAME_PROCESSOR=powerpc - fi - if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac - fi - # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc - if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_PPC >/dev/null - then - UNAME_PROCESSOR=powerpc - fi - fi - elif test "$UNAME_PROCESSOR" = i386 ; then - # Avoid executing cc on OS X 10.9, as it ships with a stub - # that puts up a graphical alert prompting to install - # developer tools. Any system running Mac OS X 10.7 or - # later (Darwin 11 and later) is required to have a 64-bit - # processor. This is not true of the ARM version of Darwin - # that Apple uses in portable devices. - UNAME_PROCESSOR=x86_64 - fi + case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; + esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = x86; then + if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1320,18 +1266,15 @@ *:QNX:*:4*) echo i386-pc-qnx exit ;; - NEO-*:NONSTOP_KERNEL:*:*) + NEO-?:NONSTOP_KERNEL:*:*) echo neo-tandem-nsk${UNAME_RELEASE} exit ;; NSE-*:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; - NSR-*:NONSTOP_KERNEL:*:*) + NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit ;; - NSX-*:NONSTOP_KERNEL:*:*) - echo nsx-tandem-nsk${UNAME_RELEASE} - exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux exit ;; @@ -1345,7 +1288,7 @@ # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = 386; then + if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1387,7 +1330,7 @@ echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1398,37 +1341,171 @@ x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; - amd64:Isilon\ OneFS:*:*) - echo x86_64-unknown-onefs - exit ;; esac -echo "$0: unable to guess system type" >&2 +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif -case "${UNAME_MACHINE}:${UNAME_SYSTEM}" in - mips:Linux | mips64:Linux) - # If we got here on MIPS GNU/Linux, output extra information. - cat >&2 < +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} EOF - ;; -esac + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi cat >&2 < in order to provide the needed +information to handle your system. config.guess timestamp = $timestamp @@ -1456,7 +1533,7 @@ exit 1 # Local variables: -# eval: (add-hook 'write-file-functions 'time-stamp) +# eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff -Nru x264-0.152.2854+gite9a5903/config.sub x264-0.158.2988+git-20191101.7817004/config.sub --- x264-0.152.2854+gite9a5903/config.sub 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/config.sub 2019-11-09 05:16:29.000000000 +0000 @@ -1,31 +1,36 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2017 Free Software Foundation, Inc. - -timestamp='2017-11-23' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. + +timestamp='2012-12-06' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). +# the same distribution terms that you use for the rest of that program. -# Please send patches to . +# Please send patches to . Submit a context +# diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -33,7 +38,7 @@ # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,11 +58,12 @@ me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS Canonicalize a configuration name. -Options: +Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -67,7 +73,9 @@ version="\ GNU config.sub ($timestamp) -Copyright 1992-2017 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -116,8 +124,8 @@ case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ - kopensolaris*-gnu* | cloudabi*-eabi* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -229,6 +237,9 @@ -ptx*) basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; -psos*) os=-psos ;; @@ -248,21 +259,19 @@ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arceb \ + | arc \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ | avr | avr32 \ - | ba \ | be32 | be64 \ | bfin \ - | c4x | c8051 | clipper \ + | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ - | e2k | epiphany \ - | fido | fr30 | frv | ft32 \ + | epiphany \ + | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ - | i370 | i860 | i960 | ia16 | ia64 \ + | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ - | k1om \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ @@ -280,30 +289,26 @@ | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ - | mipsisa32r6 | mipsisa32r6el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64r6 | mipsisa64r6el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ - | mipsr5900 | mipsr5900el \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ | nds32 | nds32le | nds32be \ - | nios | nios2 | nios2eb | nios2el \ + | nios | nios2 \ | ns16k | ns32k \ - | open8 | or1k | or1knd | or32 \ + | open8 \ + | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ - | pru \ | pyramid \ - | riscv32 | riscv64 \ | rl78 | rx \ | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ @@ -311,8 +316,7 @@ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ - | visium \ - | wasm32 \ + | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) basic_machine=$basic_machine-unknown @@ -326,10 +330,7 @@ c6x) basic_machine=tic6x-unknown ;; - leon|leon[3-9]) - basic_machine=sparc-$basic_machine - ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) basic_machine=$basic_machine-unknown os=-none ;; @@ -371,23 +372,21 @@ | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ - | ba-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | c8051-* | clipper-* | craynv-* | cydra-* \ + | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ - | e2k-* | elxsi-* \ + | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ - | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ - | k1om-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ @@ -407,34 +406,28 @@ | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa32r6-* | mipsisa32r6el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64r6-* | mipsisa64r6el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipsr5900-* | mipsr5900el-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* | nios2eb-* | nios2el-* \ + | nios-* | nios2-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ - | or1k*-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ - | pru-* \ | pyramid-* \ - | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ @@ -442,8 +435,6 @@ | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ - | visium-* \ - | wasm32-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -520,9 +511,6 @@ basic_machine=i386-pc os=-aros ;; - asmjs) - basic_machine=asmjs-unknown - ;; aux) basic_machine=m68k-apple os=-aux @@ -639,18 +627,10 @@ basic_machine=rs6000-bull os=-bosx ;; - dpx2*) + dpx2* | dpx2*-bull) basic_machine=m68k-bull os=-sysv3 ;; - e500v[12]) - basic_machine=powerpc-unknown - os=$os"spe" - ;; - e500v[12]-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - os=$os"spe" - ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -792,9 +772,6 @@ basic_machine=m68k-isi os=-sysv ;; - leon-*|leon[3-9]-*) - basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'` - ;; m68knommu) basic_machine=m68k-unknown os=-linux @@ -822,7 +799,7 @@ os=-mingw64 ;; mingw32) - basic_machine=i686-pc + basic_machine=i386-pc os=-mingw32 ;; mingw32ce) @@ -850,10 +827,6 @@ basic_machine=powerpc-unknown os=-morphos ;; - moxiebox) - basic_machine=moxie-unknown - os=-moxiebox - ;; msdos) basic_machine=i386-pc os=-msdos @@ -862,7 +835,7 @@ basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` ;; msys) - basic_machine=i686-pc + basic_machine=i386-pc os=-msys ;; mvs) @@ -901,7 +874,7 @@ basic_machine=v70-nec os=-sysv ;; - next | m*-next) + next | m*-next ) basic_machine=m68k-next case $os in -nextstep* ) @@ -946,9 +919,6 @@ nsr-tandem) basic_machine=nsr-tandem ;; - nsx-tandem) - basic_machine=nsx-tandem - ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf @@ -1033,7 +1003,7 @@ ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle) + ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1043,7 +1013,7 @@ ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little) + ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1244,9 +1214,6 @@ basic_machine=a29k-wrs os=-vxworks ;; - wasm32) - basic_machine=wasm32-unknown - ;; w65*) basic_machine=w65-wdc os=-none @@ -1255,9 +1222,6 @@ basic_machine=hppa1.1-winbond os=-proelf ;; - x64) - basic_machine=x86_64-pc - ;; xbox) basic_machine=i686-pc os=-mingw32 @@ -1365,8 +1329,8 @@ if [ x"$os" != x"" ] then case $os in - # First match some system type aliases that might get confused - # with valid system types. + # First match some system type aliases + # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. -auroraux) os=-auroraux @@ -1386,37 +1350,36 @@ -gnu/linux*) os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` ;; - # Now accept the basic system types. + # First accept the basic system types. # The portable systems comes first. - # Each alternative MUST end in a * to match a version number. + # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ - | -sym* | -kopensolaris* | -plan9* \ + | -sym* | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* | -cloudabi* | -sortix* \ + | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ + | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ - | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1491,7 +1454,7 @@ -nova*) os=-rtmk-nova ;; - -ns2) + -ns2 ) os=-nextstep2 ;; -nsk*) @@ -1540,29 +1503,17 @@ -aros*) os=-aros ;; + -kaos*) + os=-kaos + ;; -zvmoe) os=-zvmoe ;; -dicos*) os=-dicos ;; - -pikeos*) - # Until real need of OS specific support for - # particular features comes up, bare metal - # configurations are quite functional. - case $basic_machine in - arm*) - os=-eabi - ;; - *) - os=-elf - ;; - esac - ;; -nacl*) ;; - -ios) - ;; -none) ;; *) @@ -1603,9 +1554,6 @@ c4x-* | tic4x-*) os=-coff ;; - c8051-*) - os=-elf - ;; hexagon-*) os=-elf ;; @@ -1658,9 +1606,6 @@ sparc-* | *-sun) os=-sunos4.1.1 ;; - pru-*) - os=-elf - ;; *-be) os=-beos ;; @@ -1706,7 +1651,7 @@ m88k-omron*) os=-luna ;; - *-next) + *-next ) os=-nextstep ;; *-sequent) @@ -1841,7 +1786,7 @@ exit # Local variables: -# eval: (add-hook 'write-file-functions 'time-stamp) +# eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff -Nru x264-0.152.2854+gite9a5903/configure x264-0.158.2988+git-20191101.7817004/configure --- x264-0.152.2854+gite9a5903/configure 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/configure 2019-11-09 05:16:29.000000000 +0000 @@ -30,8 +30,8 @@ --disable-thread disable multithreaded encoding --disable-win32thread disable win32threads (windows only) --disable-interlaced disable interlaced encoding support - --bit-depth=BIT_DEPTH set output bit depth (8-10) [8] - --chroma-format=FORMAT output chroma format (420, 422, 444, all) [all] + --bit-depth=BIT_DEPTH set output bit depth (8, 10, all) [all] + --chroma-format=FORMAT output chroma format (400, 420, 422, 444, all) [all] Advanced options: --disable-asm disable platform-specific assembly optimizations @@ -151,9 +151,9 @@ done echo "int main (void) { $3 return 0; }" >> conftest.c if [ $compiler_style = MS ]; then - cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)" + cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)" else - cc_cmd="$CC conftest.c $CFLAGS $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest" + cc_cmd="$CC conftest.c $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest" fi if $cc_cmd >conftest.log 2>&1; then res=$? @@ -253,6 +253,24 @@ return $res } +pkg_check() { + log_check "for packages: $1" + pkg_cmd="$PKGCONFIG --exists $1" + if $pkg_cmd >conftest.log 2>&1; then + res=$? + log_ok + else + res=$? + log_fail + log_msg "Failed commandline was:" + log_msg "--------------------------------------------------" + log_msg "$pkg_cmd" + cat conftest.log >> config.log + log_msg "--------------------------------------------------" + fi + return $res +} + define() { echo "#define $1$([ -n "$2" ] && echo " $2" || echo " 1")" >> config.h } @@ -276,6 +294,7 @@ fi arg="$(grep '#define X264_BIT_DEPTH ' $x264_config_path | sed -e 's/#define X264_BIT_DEPTH *//; s/ *$//')" if [ -n "$arg" ]; then + [ "$arg" = 0 ] && arg="all" if [ "$arg" != "$bit_depth" ]; then echo "Override output bit depth with system libx264 configuration" bit_depth="$arg" @@ -353,7 +372,7 @@ gprof="no" strip="no" pic="no" -bit_depth="8" +bit_depth="all" chroma_format="all" compiler="GNU" compiler_style="GNU" @@ -361,6 +380,8 @@ vsx="auto" CFLAGS="$CFLAGS -Wall -I. -I\$(SRCPATH)" +CFLAGSSO="$CFLAGSSO" +CFLAGSCLI="$CFLAGSCLI" LDFLAGS="$LDFLAGS" LDFLAGSCLI="$LDFLAGSCLI" ASFLAGS="$ASFLAGS -I. -I\$(SRCPATH)" @@ -375,9 +396,9 @@ " # list of all preprocessor HAVE values we can define -CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \ - MSA MMAP WINRT VSX ARM_INLINE_ASM" + MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10" # parse options @@ -491,16 +512,15 @@ ;; --bit-depth=*) bit_depth="$optarg" - if [ "$bit_depth" -lt "8" -o "$bit_depth" -gt "10" ]; then - echo "Supplied bit depth must be in range [8,10]." + if [ "$bit_depth" != "8" -a "$bit_depth" != "10" -a "$bit_depth" != "all" ]; then + echo "Supplied bit depth must be 8, 10 or all." exit 1 fi - bit_depth=`expr $bit_depth + 0` ;; --chroma-format=*) chroma_format="$optarg" - if [ $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then - echo "Supplied chroma format must be 420, 422, 444 or all." + if [ $chroma_format != "400" -a $chroma_format != "420" -a $chroma_format != "422" -a $chroma_format != "444" -a $chroma_format != "all" ]; then + echo "Supplied chroma format must be 400, 420, 422, 444 or all." exit 1 fi ;; @@ -530,10 +550,10 @@ fi if [ "x$host" = x ]; then - host=`${SRCPATH}/config.guess` + host="$(${SRCPATH}/config.guess)" fi # normalize a triplet into a quadruplet -host=`${SRCPATH}/config.sub $host` +host="$(${SRCPATH}/config.sub $host)" # split $host host_cpu="${host%%-*}" @@ -544,7 +564,7 @@ trap 'rm -rf conftest*' EXIT # test for use of compilers that require specific handling -cc_base=`basename "$CC"` +cc_base="$(basename "$CC")" QPRE="-" if [[ $host_os = mingw* || $host_os = cygwin* ]]; then if [[ "$cc_base" = icl || "$cc_base" = icl[\ .]* ]]; then @@ -720,7 +740,7 @@ stack_alignment=16 [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" + ASFLAGS="$ASFLAGS -f macho64 -DPREFIX" if cc_check '' "-arch x86_64"; then CFLAGS="$CFLAGS -arch x86_64" LDFLAGS="$LDFLAGS -arch x86_64" @@ -780,8 +800,10 @@ CFLAGS="$CFLAGS -arch armv7" LDFLAGS="$LDFLAGS -arch armv7" fi - elif [ "$SYS" = WINDOWS ] ; then + elif [ "$SYS" = WINDOWS ] && [ "$compiler" = CL ] ; then AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch arm -as-type armasm -force-thumb -- armasm -nologo -ignore 4509}" + elif [ "$SYS" = WINDOWS ] ; then + AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch arm -as-type clang -force-thumb -- ${CC} -mimplicit-it=always}" else AS="${AS-${CC}}" fi @@ -789,9 +811,13 @@ aarch64) ARCH="AARCH64" stack_alignment=16 - AS="${AS-${CC}}" if [ "$SYS" = MACOSX ] ; then + AS="${AS-${CC}}" ASFLAGS="$ASFLAGS -DPREFIX -DPIC" + elif [ "$SYS" = WINDOWS ] && [ "$compiler" = CL ] ; then + AS="${AS-${SRCPATH}/tools/gas-preprocessor.pl -arch aarch64 -as-type armasm -- armasm64 -nologo}" + else + AS="${AS-${CC}}" fi ;; s390|s390x) @@ -866,12 +892,17 @@ if cc_check '' -mpreferred-stack-boundary=6 ; then CFLAGS="$CFLAGS -mpreferred-stack-boundary=6" stack_alignment=64 - elif cc_check '' -mpreferred-stack-boundary=5 ; then - CFLAGS="$CFLAGS -mpreferred-stack-boundary=5" - stack_alignment=32 - elif [ $stack_alignment -lt 16 ] && cc_check '' -mpreferred-stack-boundary=4 ; then - CFLAGS="$CFLAGS -mpreferred-stack-boundary=4" - stack_alignment=16 + elif cc_check '' -mstack-alignment=64 ; then + CFLAGS="$CFLAGS -mstack-alignment=64" + stack_alignment=64 + elif [ $stack_alignment -lt 16 ] ; then + if cc_check '' -mpreferred-stack-boundary=4 ; then + CFLAGS="$CFLAGS -mpreferred-stack-boundary=4" + stack_alignment=16 + elif cc_check '' -mstack-alignment=16 ; then + CFLAGS="$CFLAGS -mstack-alignment=16" + stack_alignment=16 + fi fi elif [ $compiler = ICC -a $ARCH = X86 ]; then # icc on linux has various degrees of mod16 stack support @@ -890,7 +921,7 @@ if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then if ! as_check "vmovdqa32 [eax]{k1}{z}, zmm0" ; then - VER=`($AS --version || echo no assembler) 2>/dev/null | head -n 1` + VER="$( ($AS --version || echo no assembler) 2>/dev/null | head -n 1 )" echo "Found $VER" echo "Minimum version is nasm-2.13" echo "If you really want to compile without asm, configure with --disable-asm." @@ -921,7 +952,12 @@ fi if [ $asm = auto -a $ARCH = AARCH64 ] ; then - if cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then define HAVE_NEON + if [ $compiler = CL ] && cpp_check '' '' 'defined(_M_ARM64)' ; then + define HAVE_AARCH64 + define HAVE_NEON + elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then + define HAVE_AARCH64 + define HAVE_NEON ASFLAGS="$ASFLAGS -c" else echo "no NEON support, try adding -mfpu=neon to CFLAGS" @@ -973,8 +1009,9 @@ if [ "$cli_libx264" = "system" -a "$shared" != "yes" ] ; then [ "$static" = "yes" ] && die "Option --system-libx264 can not be used together with --enable-static" - if $PKGCONFIG --exists x264 2>/dev/null; then + if pkg_check x264 ; then X264_LIBS="$($PKGCONFIG --libs x264)" + X264_CFLAGS="$($PKGCONFIG --cflags x264)" X264_INCLUDE_DIR="${X264_INCLUDE_DIR-$($PKGCONFIG --variable=includedir x264)}" configure_system_override "$X264_INCLUDE_DIR" || die "Detection of system libx264 configuration failed" else @@ -1035,10 +1072,21 @@ fi [ "$thread" != "no" ] && define HAVE_THREAD -if cc_check "math.h" "-Werror" "return log2f(2);" ; then +if cc_check 'math.h' '' 'volatile float x = 2; return log2f(x);' ; then define HAVE_LOG2F fi +if cc_check 'string.h' '' 'strtok_r(0, 0, 0);' ; then + define HAVE_STRTOK_R +fi + +if cc_check 'time.h' '' 'clock_gettime(CLOCK_MONOTONIC, 0);' ; then + define HAVE_CLOCK_GETTIME +elif cc_check 'time.h' '-lrt' 'clock_gettime(CLOCK_MONOTONIC, 0);' ; then + define HAVE_CLOCK_GETTIME + LDFLAGS="$LDFLAGS -lrt" +fi + if [ "$SYS" != "WINDOWS" ] && cpp_check "sys/mman.h unistd.h" "" "defined(MAP_PRIVATE)"; then define HAVE_MMAP fi @@ -1059,48 +1107,52 @@ if [ "$swscale" = "auto" ] ; then swscale="no" - if $PKGCONFIG --exists libswscale 2>/dev/null; then + if pkg_check 'libswscale libavutil' ; then SWSCALE_LIBS="$SWSCALE_LIBS $($PKGCONFIG --libs libswscale libavutil)" SWSCALE_CFLAGS="$SWSCALE_CFLAGS $($PKGCONFIG --cflags libswscale libavutil)" fi [ -z "$SWSCALE_LIBS" ] && SWSCALE_LIBS="-lswscale -lavutil" if cc_check "libswscale/swscale.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "sws_init_context(0,0,0);" ; then - if cpp_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "defined(AV_PIX_FMT_FLAG_RGB)" ; then + if cc_check "libavutil/pixdesc.h" "$SWSCALE_CFLAGS $SWSCALE_LIBS" "AVComponentDescriptor x; x.depth = 8;" ; then swscale="yes" else - echo "Warning: AV_PIX_FMT_FLAG_RGB is missing from libavutil, update for swscale support" + echo "Warning: libswscale is too old" fi fi fi if [ "$lavf" = "auto" ] ; then lavf="no" - if $PKGCONFIG --exists libavformat libavcodec libswscale 2>/dev/null; then - LAVF_LIBS="$LAVF_LIBS $($PKGCONFIG --libs libavformat libavcodec libavutil libswscale)" - LAVF_CFLAGS="$LAVF_CFLAGS $($PKGCONFIG --cflags libavformat libavcodec libavutil libswscale)" + if pkg_check 'libavformat libavcodec libavutil' ; then + LAVF_LIBS="$LAVF_LIBS $($PKGCONFIG --libs libavformat libavcodec libavutil)" + LAVF_CFLAGS="$LAVF_CFLAGS $($PKGCONFIG --cflags libavformat libavcodec libavutil)" fi - if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then + if [ -z "$LAVF_LIBS" ] && cc_check '' -lavformat ; then LAVF_LIBS="-lavformat" - for lib in -lpostproc -lavcodec -lswscale -lavutil -lm -lz -lbz2 $libpthread -lavifil32 -lws2_32; do + for lib in -lavcodec -lavresample -lswresample -lavutil -lbz2 -lz $libpthread -lole32 -luser32 -lws2_32 -lsecur32 ; do cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib" done fi - LAVF_LIBS="-L. $LAVF_LIBS" - if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "av_frame_free(0);" ; then - if [ "$swscale" = "yes" ]; then + + if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "av_register_all();" ; then + if cc_check libavcodec/avcodec.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_send_packet(0,0);" ; then lavf="yes" else - echo "Warning: libavformat is not supported without swscale support" + echo "Warning: libavformat is too old" fi fi + if [ "$lavf" = "yes" -a "$swscale" = "no" ]; then + echo "Warning: libavformat is not supported without swscale support" + lavf="no" + fi fi if [ "$ffms" = "auto" ] ; then ffms_major="2"; ffms_minor="21"; ffms_micro="0"; ffms_bump="0" ffms="no" - if $PKGCONFIG --exists ffms2 2>/dev/null; then + if pkg_check ffms2 ; then FFMS2_LIBS="$FFMS2_LIBS $($PKGCONFIG --libs ffms2)" FFMS2_CFLAGS="$FFMS2_CFLAGS $($PKGCONFIG --cflags ffms2)" fi @@ -1142,13 +1194,13 @@ if [ "$lsmash" = "auto" ] ; then lsmash="no" - if $PKGCONFIG --exists liblsmash 2>/dev/null; then + if pkg_check liblsmash ; then LSMASH_LIBS="$LSMASH_LIBS $($PKGCONFIG --libs liblsmash)" LSMASH_CFLAGS="$LSMASH_CFLAGS $($PKGCONFIG --cflags liblsmash)" fi [ -z "$LSMASH_LIBS" ] && LSMASH_LIBS="-llsmash" - if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" ; then + if cc_check lsmash.h "$LSMASH_CFLAGS $LSMASH_LIBS" "lsmash_destroy_root(0);" ; then if cpp_check lsmash.h "$LSMASH_CFLAGS" "LSMASH_VERSION_MAJOR > 1 || (LSMASH_VERSION_MAJOR == 1 && LSMASH_VERSION_MINOR >= 5)" ; then lsmash="yes" else @@ -1165,7 +1217,7 @@ cc_check "" -lws2_32 && GPAC_LIBS="$GPAC_LIBS -lws2_32" cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm" fi - if cc_check gpac/isomedia.h "$GPAC_LIBS" ; then + if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_close(0);" ; then if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then gpac="yes" else @@ -1181,8 +1233,8 @@ define HAVE_LSMASH elif [ "$gpac" = "yes" ] ; then mp4="gpac" - define HAVE_GPAC LDFLAGSCLI="$GPAC_LIBS $LDFLAGSCLI" + define HAVE_GPAC fi if [ "$avs" = "auto" ] ; then @@ -1206,7 +1258,7 @@ if [ "$pic" = "yes" ] ; then [ "$SYS" != WINDOWS -a "$SYS" != CYGWIN ] && CFLAGS="$CFLAGS -fPIC" - ASFLAGS="$ASFLAGS -DPIC" + [[ "$ASFLAGS" != *"-DPIC"* ]] && ASFLAGS="$ASFLAGS -DPIC" # resolve textrels in the x86 asm cc_check stdio.h "-shared -Wl,-Bsymbolic" && SOFLAGS="$SOFLAGS -Wl,-Bsymbolic" [ $SYS = SunOS -a "$ARCH" = "X86" ] && SOFLAGS="$SOFLAGS -mimpure-text" @@ -1261,26 +1313,30 @@ CFLAGS="-Wno-maybe-uninitialized $CFLAGS" fi +if [ $compiler = GNU ] && cc_check '' -fvisibility=hidden ; then + CFLAGS="$CFLAGS -fvisibility=hidden" +fi + if [ $compiler = ICC -o $compiler = ICL ] ; then if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then define HAVE_INTEL_DISPATCHER fi fi -if [ "$bit_depth" -gt "8" ]; then - define HIGH_BIT_DEPTH - ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1" +if [ "$bit_depth" = "all" ]; then + define HAVE_BITDEPTH8 + define HAVE_BITDEPTH10 +elif [ "$bit_depth" -eq "8" ]; then + define HAVE_BITDEPTH8 +elif [ "$bit_depth" -eq "10" ]; then + define HAVE_BITDEPTH10 opencl="no" -else - ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=0" fi if [ "$chroma_format" != "all" ]; then define CHROMA_FORMAT CHROMA_$chroma_format fi -ASFLAGS="$ASFLAGS -DBIT_DEPTH=$bit_depth" - [ $gpl = yes ] && define HAVE_GPL && x264_gpl=1 || x264_gpl=0 [ $interlaced = yes ] && define HAVE_INTERLACED && x264_interlaced=1 || x264_interlaced=0 @@ -1291,10 +1347,10 @@ # cygwin can use opencl if it can use LoadLibrary if [ $SYS = WINDOWS ] || ([ $SYS = CYGWIN ] && cc_check windows.h "" "LoadLibraryW(0);") ; then opencl="yes" - define HAVE_OPENCL + define HAVE_OPENCL "(BIT_DEPTH==8)" elif [ "$SYS" = "LINUX" -o "$SYS" = "MACOSX" ] ; then opencl="yes" - define HAVE_OPENCL + define HAVE_OPENCL "(BIT_DEPTH==8)" libdl="-ldl" fi LDFLAGS="$LDFLAGS $libdl" @@ -1302,28 +1358,38 @@ #define undefined vars as 0 for var in $CONFIG_HAVE; do - grep -q "HAVE_$var 1" config.h || define HAVE_$var 0 + grep -q "HAVE_$var " config.h || define HAVE_$var 0 done # generate exported config file -config_chroma_format="X264_CSP_I$chroma_format" -[ "$config_chroma_format" == "X264_CSP_Iall" ] && config_chroma_format="0" +[ "$bit_depth" = "all" ] && config_bit_depth="0" || config_bit_depth="$bit_depth" +[ "$chroma_format" = "all" ] && config_chroma_format="0" || config_chroma_format="X264_CSP_I$chroma_format" cat > x264_config.h << EOF -#define X264_BIT_DEPTH $bit_depth #define X264_GPL $x264_gpl #define X264_INTERLACED $x264_interlaced +#define X264_BIT_DEPTH $config_bit_depth #define X264_CHROMA_FORMAT $config_chroma_format EOF ${SRCPATH}/version.sh >> x264_config.h +if [ "$shared" = "yes" ]; then + CFLAGSSO="$CFLAGSSO -DX264_API_EXPORTS" +fi + if [ "$cli_libx264" = "system" ] ; then if [ "$shared" = "yes" ]; then - CLI_LIBX264='$(SONAME)' + if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then + CLI_LIBX264='$(IMPLIBNAME)' + else + CLI_LIBX264='$(SONAME)' + fi + CFLAGSCLI="$CFLAGSCLI -DX264_API_IMPORTS" else CLI_LIBX264= LDFLAGSCLI="$X264_LIBS $LDFLAGSCLI" + CFLAGSCLI="$CFLAGSCLI $X264_CFLAGS" cc_check 'stdint.h x264.h' '' 'x264_encoder_open(0);' || die "System libx264 can't be used for compilation of this version" fi else @@ -1362,7 +1428,11 @@ LIBX264=libx264.a [ -n "$RC" ] && RCFLAGS="$RCFLAGS -I. -o " fi -[ $compiler != GNU ] && CFLAGS="$(cc_cflags $CFLAGS)" +if [ $compiler != GNU ]; then + CFLAGS="$(cc_cflags $CFLAGS)" + CFLAGSSO="$(cc_cflags $CFLAGSSO)" + CFLAGSCLI="$(cc_cflags $CFLAGSCLI)" +fi if [ $compiler = ICC -o $compiler = ICL ]; then # icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP [ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__ @@ -1401,13 +1471,17 @@ SYS=$SYS CC=$CC CFLAGS=$CFLAGS +CFLAGSSO=$CFLAGSSO +CFLAGSCLI=$CFLAGSCLI COMPILER=$compiler COMPILER_STYLE=$compiler_style DEPMM=$DEPMM DEPMT=$DEPMT LD=$LD LDFLAGS=$LDFLAGS +LDFLAGSCLI=$LDFLAGSCLI LIBX264=$LIBX264 +CLI_LIBX264=$CLI_LIBX264 AR=$AR RANLIB=$RANLIB STRIP=$STRIP @@ -1427,8 +1501,9 @@ EOF if [ $compiler_style = MS ]; then - echo '%.o: %.c' >> config.mak - echo ' $(CC) $(CFLAGS) -c -Fo$@ $<' >> config.mak + echo 'CC_O=-Fo$@' >> config.mak +else + echo 'CC_O=-o $@' >> config.mak fi if [ "$cli" = "yes" ]; then @@ -1442,14 +1517,7 @@ echo "SONAME=libx264-$API.dll" >> config.mak if [ $compiler_style = MS ]; then echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak - # GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations - # MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time - echo "SOFLAGS=-dll -def:x264.def -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak - echo "EXPORTS" > x264.def - # export API functions - grep "^\(int\|void\|x264_t\).*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264.*\)(.*/\1/;s/open/open_$API/g" >> x264.def - # export API variables/data. must be flagged with the DATA keyword - grep "extern.*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264\w*\)\W.*/\1 DATA/;" >> x264.def + echo "SOFLAGS=-dll -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak else echo 'IMPLIBNAME=libx264.dll.a' >> config.mak echo "SOFLAGS=-shared -Wl,--out-implib,\$(IMPLIBNAME) $SOFLAGS" >> config.mak @@ -1476,9 +1544,6 @@ echo 'install: install-lib-static' >> config.mak fi -echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak -echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak - cat > x264.pc << EOF prefix=$prefix exec_prefix=$exec_prefix @@ -1487,16 +1552,14 @@ Name: x264 Description: H.264 (MPEG4 AVC) encoder library -Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//') +Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//; s/ .*//') Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl) Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl) -Cflags: -I$includedir +Cflags: -I$includedir $([ "$shared" = "yes" ] && echo "-DX264_API_IMPORTS") EOF filters="crop select_every" -gpl_filters="" [ $swscale = yes ] && filters="resize $filters" -[ $gpl = yes ] && filters="$filters $gpl_filters" cat > conftest.log < Sat, 9 Nov 2019 15:33:26 +0800 + +x264 (2:0.158.2984+git3759fcb-1) bionic; urgency=medium + + * Update to 0.158.2984+git3759fcb. + + -- Hung-Yi Chen Thu, 15 Aug 2019 17:28:58 +0800 + +x264 (2:0.155.2917+git0a84d98-2) unstable; urgency=medium + + * Team upload. + * Upload to unstable. + + -- Sebastian Ramacher Thu, 27 Sep 2018 22:03:40 +0200 + +x264 (2:0.155.2917+git0a84d98-1) experimental; urgency=medium + + [ OndÅ™ej Nový ] + * d/copyright: Change Format URL to correct one + * d/control: Set Vcs-* to salsa.debian.org + * d/changelog: Remove trailing whitespaces + + [ Felipe Sateler ] + * Change maintainer address to debian-multimedia@lists.debian.org + + [ OndÅ™ej Nový ] + * d/tests: Use AUTOPKGTEST_TMP instead of ADTTMP + + [ Rico Tzschichholz ] + * Update to new stable upstream + * New upstream version 0.155.2917+git0a84d98 + * Drop custom 10bit build, upstream supports this as runtime option now + * Update debian/control for soname bump + * Regenerate manpage + * Update copyright years + + [ Sebastian Ramacher ] + * debian/{rules,confflags}: + - Use dpkg include for architecture variables. + - Fix dh_install --list-missing warning. + - Remove cruft. + - Handle CPPFLAGS. + * debian/patches: Export x264_stack_align. + * debian/: Bump debhelper compat to 11. + * debian/control: + - Drop obsolete dpkg-dev B-D. + - Bump Standards-Version. + + -- Rico Tzschichholz Fri, 24 Aug 2018 17:25:59 +0200 + x264 (2:0.152.2854+gite9a5903-2) unstable; urgency=medium * Team upload. @@ -603,11 +658,11 @@ * Drop --enable-pic, let's see what breaks, LP: #524859 [ Reinhard Tartler ] - * New upstream snapshot, no new features, LP: #526396 + * New upstream snapshot, no new features, LP: #526396 * remove quilt infrastructure * don't set CFLAGS in debian/rules, upstream build system overrides this anyways - + -- Reinhard Tartler Sun, 21 Feb 2010 16:57:21 +0100 x264 (2:0.85.1442.1+git781d30-1) lucid; urgency=low diff -Nru x264-0.152.2854+gite9a5903/debian/compat x264-0.158.2988+git-20191101.7817004/debian/compat --- x264-0.152.2854+gite9a5903/debian/compat 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/compat 2019-11-09 05:16:41.000000000 +0000 @@ -1 +1 @@ -9 +11 diff -Nru x264-0.152.2854+gite9a5903/debian/confflags x264-0.158.2988+git-20191101.7817004/debian/confflags --- x264-0.152.2854+gite9a5903/debian/confflags 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/confflags 2019-11-09 05:16:41.000000000 +0000 @@ -2,14 +2,7 @@ # vi:syntax=make # configure flags logic -# Set CFLAGS from DEB_CFLAGS if defined, otherwise add build flags from -# dpkg-buildflags excluding -O2. -CFLAGS = $(or $(DEB_CFLAGS),$(shell dpkg-buildflags --get CFLAGS 2>/dev/null | sed -e 's/-O2//g')) -ifeq (,$(CFLAGS)) - # Handle case for versions of Debian/Ubuntu that have dpkg-dev (<< 1.15.7). - CFLAGS = -fstack-protector --param=ssp-buffer-size=4 -Wformat -Wformat-security -Werror=format-security -endif -shared_extra_cflags = $(CFLAGS) +extra_cflags = $(CFLAGS) $(CPPFLAGS) LDFLAGS := $(filter-out %-Bsymbolic-functions,$(LDFLAGS)) @@ -119,14 +112,6 @@ endif endif -# See Bug#743713, the debian sparc and sh4 ports are currently stuck with gcc 4.6 -# -fno-aggressive-loop-optimizations was introduced only in gcc 4.8 -# this conditional will also help any backporters. -HAVEGCC4.8 :=$(shell dpkg --compare-versions `gcc --version | grep ^gcc | sed 's/^.* //g'` ge 4.8 && echo yes || echo no) -ifeq (yes,$(HAVEGCC4.8)) -common_confflags += --extra-cflags=-fno-aggressive-loop-optimizations -endif - # MIPS upstream arch, mips, mipsel and mips64el Debian arches; no upstream flags by # default ifneq (,$(filter mips mipsel mips64el,$(DEB_HOST_GNU_CPU))) @@ -151,7 +136,7 @@ ifeq ($(toolchain_arch),) toolchain_arch := armv6t2 endif -shared_extra_cflags += -march=$(toolchain_arch) +extra_cflags += -march=$(toolchain_arch) else do_opt := yes opt_libdir := /usr/lib/$(DEB_HOST_MULTIARCH)/neon/vfp diff -Nru x264-0.152.2854+gite9a5903/debian/control x264-0.158.2988+git-20191101.7817004/debian/control --- x264-0.152.2854+gite9a5903/debian/control 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/control 2019-11-09 07:33:26.000000000 +0000 @@ -1,21 +1,20 @@ Source: x264 Section: libs Priority: optional -Maintainer: Debian Multimedia Maintainers +Maintainer: Debian Multimedia Maintainers Uploaders: Reinhard Tartler , Fabian Greffrath , Rico Tzschichholz Build-Depends: - debhelper (>= 9.20160115), - dpkg-dev (>= 1.17.14), + debhelper (>= 11), libavformat-dev (>= 6:9) , libffms2-dev , libgpac-dev (>= 0.5.0+svn4288~) , nasm (>= 2.13) [any-i386 any-amd64] -Standards-Version: 4.1.3 -Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git -Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git +Standards-Version: 4.1.4 +Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git +Vcs-Browser: https://salsa.debian.org/multimedia-team/x264 Homepage: http://www.videolan.org/developers/x264.html Package: x264 @@ -48,7 +47,7 @@ * parallel encoding on multiple CPUs * interlaced streams -Package: libx264-152 +Package: libx264-158 Architecture: any Multi-Arch: same Depends: @@ -65,7 +64,7 @@ Architecture: any Multi-Arch: same Depends: - libx264-152 (= ${binary:Version}), + libx264-158 (= ${binary:Version}), ${misc:Depends} Description: development files for libx264 libx264 is an advanced encoding library for creating H.264 (MPEG-4 AVC) diff -Nru x264-0.152.2854+gite9a5903/debian/control.in x264-0.158.2988+git-20191101.7817004/debian/control.in --- x264-0.152.2854+gite9a5903/debian/control.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/control.in 2019-11-09 05:16:41.000000000 +0000 @@ -1,21 +1,20 @@ Source: x264 Section: libs Priority: optional -Maintainer: Debian Multimedia Maintainers +Maintainer: Debian Multimedia Maintainers Uploaders: Reinhard Tartler , Fabian Greffrath , Rico Tzschichholz Build-Depends: - debhelper (>= 9.20160115), - dpkg-dev (>= 1.17.14), + debhelper (>= 11), libavformat-dev (>= 6:9) , libffms2-dev , libgpac-dev (>= 0.5.0+svn4288~) , nasm (>= 2.13) [any-i386 any-amd64] -Standards-Version: 4.1.3 -Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git -Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git +Standards-Version: 4.1.4 +Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git +Vcs-Browser: https://salsa.debian.org/multimedia-team/x264 Homepage: http://www.videolan.org/developers/x264.html Package: x264 diff -Nru x264-0.152.2854+gite9a5903/debian/copyright x264-0.158.2988+git-20191101.7817004/debian/copyright --- x264-0.152.2854+gite9a5903/debian/copyright 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/copyright 2019-11-09 05:16:41.000000000 +0000 @@ -1,17 +1,17 @@ -Format: http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=174&view=markup +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: x264 Upstream-Contact: x264-devel@videolan.org Source: http://www.videolan.org/developers/x264.html Files: * -Copyright: 2003-2017 x264 project +Copyright: 2003-2018 x264 project License: GPL-2+ Comment: This program is also available under a commercial proprietary license. For more information, contact us at licensing@x264.com. Files: common/x86/x86inc.asm -Copyright: 2005-2017 x264 project +Copyright: 2005-2018 x264 project License: ISC Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff -Nru x264-0.152.2854+gite9a5903/debian/libx264N.install.in x264-0.158.2988+git-20191101.7817004/debian/libx264N.install.in --- x264-0.152.2854+gite9a5903/debian/libx264N.install.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/libx264N.install.in 2019-11-09 05:16:41.000000000 +0000 @@ -1,2 +1 @@ shared/usr/lib/@DEB_HOST_MULTIARCH@/libx264.so.* usr/lib/@DEB_HOST_MULTIARCH@ -shared/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit/libx264.so.* usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit diff -Nru x264-0.152.2854+gite9a5903/debian/patches/gpac-0.8.patch x264-0.158.2988+git-20191101.7817004/debian/patches/gpac-0.8.patch --- x264-0.152.2854+gite9a5903/debian/patches/gpac-0.8.patch 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/patches/gpac-0.8.patch 2019-11-09 07:30:02.000000000 +0000 @@ -0,0 +1,31 @@ +Description: Patch for GPAC 0.8. + GPAC 0.8 changes interface. +Author: Hung-Yi Chen + +Index: x264-0.158.2988+git7817004/configure +=================================================================== +--- x264-0.158.2988+git7817004.orig/configure ++++ x264-0.158.2988+git7817004/configure +@@ -1218,7 +1218,7 @@ + cc_check "" -lwinmm && GPAC_LIBS="$GPAC_LIBS -lwinmm" + fi + if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_close(0);" ; then +- if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then ++ if cc_check gpac/isomedia.h "$GPAC_LIBS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0,0);" ; then + gpac="yes" + else + echo "Warning: gpac is too old, update to 2007-06-21 UTC or later" + +Index: x264-0.158.2988+git7817004/output/mp4.c +=================================================================== +--- x264-0.158.2988+git7817004.orig/output/mp4.c ++++ x264-0.158.2988+git7817004/output/mp4.c +@@ -233,7 +233,7 @@ + dw *= sar; + else + dh /= sar; +- gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height ); ++ gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height, 0 ); + gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 ); + } + diff -Nru x264-0.152.2854+gite9a5903/debian/patches/properly_detect_x32.patch x264-0.158.2988+git-20191101.7817004/debian/patches/properly_detect_x32.patch --- x264-0.152.2854+gite9a5903/debian/patches/properly_detect_x32.patch 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/patches/properly_detect_x32.patch 2019-11-09 05:16:41.000000000 +0000 @@ -3,9 +3,11 @@ This version doesn't use any assembly yet, a proper port is needed. Author: Adam Borowski ---- x264-0.142.2431+gita5831aa.orig/common/common.h -+++ x264-0.142.2431+gita5831aa/common/common.h -@@ -1010,7 +1010,7 @@ static int ALWAYS_INLINE x264_predictor_ +Index: x264-0.158.2984+git3759fcb/common/common.h +=================================================================== +--- x264-0.158.2984+git3759fcb.orig/common/common.h ++++ x264-0.158.2984+git3759fcb/common/common.h +@@ -800,7 +800,7 @@ static ALWAYS_INLINE int x264_predictor_ return cnt; } @@ -14,9 +16,11 @@ #include "x86/util.h" #endif ---- x264-0.142.2431+gita5831aa.orig/configure -+++ x264-0.142.2431+gita5831aa/configure -@@ -556,6 +556,10 @@ esac +Index: x264-0.158.2984+git3759fcb/configure +=================================================================== +--- x264-0.158.2984+git3759fcb.orig/configure ++++ x264-0.158.2984+git3759fcb/configure +@@ -704,6 +704,10 @@ esac LDFLAGS="$LDFLAGS $libm" @@ -27,7 +31,7 @@ stack_alignment=4 case $host_cpu in i*86) -@@ -617,6 +621,10 @@ case $host_cpu in +@@ -760,6 +764,10 @@ case $host_cpu in ASFLAGS="$ASFLAGS -f elf64" fi ;; diff -Nru x264-0.152.2854+gite9a5903/debian/patches/series x264-0.158.2988+git-20191101.7817004/debian/patches/series --- x264-0.152.2854+gite9a5903/debian/patches/series 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/patches/series 2019-11-09 07:31:50.000000000 +0000 @@ -1,2 +1,4 @@ link_gpac_dynamically.patch properly_detect_x32.patch +version.patch +gpac-0.8.patch diff -Nru x264-0.152.2854+gite9a5903/debian/patches/version.patch x264-0.158.2988+git-20191101.7817004/debian/patches/version.patch --- x264-0.152.2854+gite9a5903/debian/patches/version.patch 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/patches/version.patch 2019-11-09 05:17:41.000000000 +0000 @@ -0,0 +1,43 @@ +Description: Update version. + Script modified from upstream source for Debian packaging. +Author: Hung-Yi Chen + +Index: x264-0.158.2988+git7817004/version.sh +=================================================================== +--- x264-0.158.2988+git7817004.orig/version.sh ++++ x264-0.158.2988+git7817004/version.sh +@@ -1,28 +1,7 @@ + #!/bin/sh +- +-cd "$(dirname "$0")" >/dev/null && [ -f x264.h ] || exit 1 +- +-api="$(grep '#define X264_BUILD' < x264.h | sed 's/^.* \([1-9][0-9]*\).*$/\1/')" +-ver="x" +-version="" +- +-if [ -d .git ] && command -v git >/dev/null 2>&1 ; then +- localver="$(($(git rev-list HEAD | wc -l)))" +- if [ "$localver" -gt 1 ] ; then +- ver_diff="$(($(git rev-list origin/master..HEAD | wc -l)))" +- ver="$((localver-ver_diff))" +- echo "#define X264_REV $ver" +- echo "#define X264_REV_DIFF $ver_diff" +- if [ "$ver_diff" -ne 0 ] ; then +- ver="$ver+$ver_diff" +- fi +- if git status | grep -q "modified:" ; then +- ver="${ver}M" +- fi +- ver="$ver $(git rev-list -n 1 HEAD | cut -c 1-7)" +- version=" r$ver" +- fi +-fi +- +-echo "#define X264_VERSION \"$version\"" +-echo "#define X264_POINTVER \"0.$api.$ver\"" ++# Script modified from upstream source for Debian packaging since packaging ++# won't include .git repository. ++echo '#define X264_REV 2988' ++echo '#define X264_REV_DIFF 0' ++echo '#define X264_VERSION " r2984 7817004"' ++echo '#define X264_POINTVER "0.158.2988 7817004"' diff -Nru x264-0.152.2854+gite9a5903/debian/rules x264-0.158.2988+git-20191101.7817004/debian/rules --- x264-0.152.2854+gite9a5903/debian/rules 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/rules 2019-11-09 05:16:41.000000000 +0000 @@ -2,18 +2,14 @@ libx264N := libx264-$(shell awk '/\#define X264_BUILD/{print $$3}' x264.h) -DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) -DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) -DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -DEB_HOST_GNU_CPU ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU) - +include /usr/share/dpkg/architecture.mk include debian/confflags DH_INSTALL_FILES = debian/$(libx264N).install \ debian/libx264-dev.install %: - dh $@ --parallel + dh $@ .PHONY: debian/control debian/control: @@ -24,13 +20,13 @@ override_dh_auto_build: # Build static lib - LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \ + CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \ || ( tail -v -n +0 config.log config.log ; exit 1 ) $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/static $(MAKE) distclean # Build shared lib - CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \ + CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \ || ( tail -v -n +0 config.log config.log ; exit 1 ) $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/shared @@ -42,25 +38,6 @@ $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/opt endif - # now do the 10 bit builds - $(MAKE) distclean - CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) --bit-depth=10 \ - || ( tail -v -n +0 config.log config.log ; exit 1 ) - $(MAKE) - install -d -m755 $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - install -m755 libx264.so.* $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - sed -e 's,@DEB_HOST_MULTIARCH@,$(DEB_HOST_MULTIARCH),' \ - debian/x264-10bit.in > $(CURDIR)/debian/install/shared/usr/bin/x264-10bit - chmod 755 $(CURDIR)/debian/install/shared/usr/bin/x264-10bit -ifeq ($(do_opt),yes) - $(MAKE) distclean - # Build opt lib - LDFLAGS="$(LDFLAGS)" ./configure $(opt_confflags) --bit-depth=10 \ - || ( tail -v -n +0 config.log config.log ; exit 1 ) - $(MAKE) - install -d -m755 $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - install -m755 libx264.so.* $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit -endif override_dh_auto_configure: # dh_auto_configure phase handled via dh_auto_build. @@ -74,12 +51,15 @@ dh_clean config.mak2 $(DH_INSTALL_FILES) override_dh_install: $(DH_INSTALL_FILES) - dh_install --list-missing --sourcedir=debian/install + dh_install --sourcedir=debian/install ifeq ($(do_opt),yes) mkdir -p debian/$(libx264N)$(opt_libdir) cp -a debian/install/opt$(opt_libdir)/*.so.* debian/$(libx264N)$(opt_libdir) endif +override_dh_missing: + dh_missing --list-missing --sourcedir=debian/install + debian/x264.1: build env LD_LIBRARY_PATH="$(LD_LIBRARY_PATH):$(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)" \ help2man -n "fast h264 encoder" -N -s1 -S "Videolan project" -h '--fullhelp' \ diff -Nru x264-0.152.2854+gite9a5903/debian/tests/encode-testimage x264-0.158.2988+git-20191101.7817004/debian/tests/encode-testimage --- x264-0.152.2854+gite9a5903/debian/tests/encode-testimage 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/tests/encode-testimage 2019-11-09 05:16:41.000000000 +0000 @@ -4,7 +4,7 @@ set -e -cd "$ADTTMP" +cd "$AUTOPKGTEST_TMP" ffmpeg -y -filter_complex testsrc -t 10 in.avi x264 --crf 24 -o out.mkv in.avi diff -Nru x264-0.152.2854+gite9a5903/debian/x264.1 x264-0.158.2988+git-20191101.7817004/debian/x264.1 --- x264-0.152.2854+gite9a5903/debian/x264.1 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264.1 2019-11-09 05:16:41.000000000 +0000 @@ -1,9 +1,9 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.5. -.TH X264 "1" "December 2017" "Videolan project" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6. +.TH X264 "1" "August 2018" "Videolan project" "User Commands" .SH NAME x264 \- fast h264 encoder .SH DESCRIPTION -x264 core:152 r2854 e9a5903 +x264 core:155 r2917 0a84d98 Syntax: x264 [options] \fB\-o\fR outfile infile .PP Infile can be raw (in which case resolution is required), @@ -18,8 +18,9 @@ \&.flv \-> Flash Video \&.mp4 \-> MP4 if compiled with GPAC or L\-SMASH support (gpac) .PP -Output bit depth: 10 (configured at compile time) -.SH OPTIONS +Output bit depth: 8/10 +\&. +Options: .TP \fB\-h\fR, \fB\-\-help\fR List basic options @@ -283,13 +284,13 @@ Ratecontrol: .TP \fB\-q\fR, \fB\-\-qp\fR -Force constant QP (0\-69, 0=lossless) +Force constant QP (0\-81, 0=lossless) .TP \fB\-B\fR, \fB\-\-bitrate\fR Set bitrate (kbit/s) .TP \fB\-\-crf\fR -Quality\-based VBR (0\-51) [23.0] +Quality\-based VBR (\fB\-12\-51\fR) [23.0] .HP \fB\-\-rc\-lookahead\fR Number of frames for frametype lookahead [40] .HP @@ -517,18 +518,25 @@ .TP smpte240m, linear, log100, log316, iec61966\-2\-4, bt1361e, iec61966\-2\-1, -bt2020\-10, bt2020\-12, smpte2084, smpte428 +bt2020\-10, bt2020\-12, smpte2084, smpte428, +arib\-std\-b67 .TP \fB\-\-colormatrix\fR Specify color matrix setting ["???"] \- undef, bt709, fcc, bt470bg, smpte170m, .TP smpte240m, GBR, YCgCo, bt2020nc, bt2020c, -smpte2085 +smpte2085, chroma\-derived\-nc, +chroma\-derived\-c, ICtCp .TP \fB\-\-chromaloc\fR Specify chroma sample location (0 to 5) [0] .TP +\fB\-\-alternative\-transfer\fR Specify an alternative transfer +characteristics ["undef"] +.IP +\- same values as \fB\-\-transfer\fR +.TP \fB\-\-nal\-hrd\fR Signal HRD information (requires vbv\-bufsize) \- none, vbr, cbr (cbr not allowed in .mp4) @@ -570,25 +578,22 @@ \- valid csps for `lavf' demuxer: yuv420p, yuyv422, rgb24, bgr24, yuv422p, yuv444p, yuv410p, yuv411p, gray, monow, monob, -pal8, yuvj420p, yuvj422p, yuvj444p, xvmcmc, -xvmcidct, uyvy422, uyyvyy411, bgr8, bgr4, -bgr4_byte, rgb8, rgb4, rgb4_byte, nv12, nv21, -argb, rgba, abgr, bgra, gray16be, gray16le, -yuv440p, yuvj440p, yuva420p, vdpau_h264, -vdpau_mpeg1, vdpau_mpeg2, vdpau_wmv3, -vdpau_vc1, rgb48be, rgb48le, rgb565be, -rgb565le, rgb555be, rgb555le, bgr565be, -bgr565le, bgr555be, bgr555le, vaapi_moco, -vaapi_idct, vaapi_vld, yuv420p16le, -yuv420p16be, yuv422p16le, yuv422p16be, -yuv444p16le, yuv444p16be, vdpau_mpeg4, -dxva2_vld, rgb444le, rgb444be, bgr444le, -bgr444be, ya8, bgr48be, bgr48le, yuv420p9be, -yuv420p9le, yuv420p10be, yuv420p10le, -yuv422p10be, yuv422p10le, yuv444p9be, -yuv444p9le, yuv444p10be, yuv444p10le, -yuv422p9be, yuv422p9le, vda_vld, gbrp, gbrp9be, -gbrp9le, gbrp10be, gbrp10le, gbrp16be, +pal8, yuvj420p, yuvj422p, yuvj444p, uyvy422, +uyyvyy411, bgr8, bgr4, bgr4_byte, rgb8, rgb4, +rgb4_byte, nv12, nv21, argb, rgba, abgr, bgra, +gray16be, gray16le, yuv440p, yuvj440p, +yuva420p, rgb48be, rgb48le, rgb565be, rgb565le, +rgb555be, rgb555le, bgr565be, bgr565le, +bgr555be, bgr555le, vaapi_moco, vaapi_idct, +vaapi_vld, yuv420p16le, yuv420p16be, +yuv422p16le, yuv422p16be, yuv444p16le, +yuv444p16be, dxva2_vld, rgb444le, rgb444be, +bgr444le, bgr444be, ya8, bgr48be, bgr48le, +yuv420p9be, yuv420p9le, yuv420p10be, +yuv420p10le, yuv422p10be, yuv422p10le, +yuv444p9be, yuv444p9le, yuv444p10be, +yuv444p10le, yuv422p9be, yuv422p9le, gbrp, +gbrp9be, gbrp9le, gbrp10be, gbrp10le, gbrp16be, gbrp16le, yuva422p, yuva444p, yuva420p9be, yuva420p9le, yuva422p9be, yuva422p9le, yuva444p9be, yuva444p9le, yuva420p10be, @@ -597,10 +602,10 @@ yuva420p16le, yuva422p16be, yuva422p16le, yuva444p16be, yuva444p16le, vdpau, xyz12le, xyz12be, nv16, nv20le, nv20be, rgba64be, -rgba64le, bgra64be, bgra64le, yvyu422, vda, -ya16be, ya16le, gbrap, gbrap16be, gbrap16le, -qsv, mmal, d3d11va_vld, cuda, 0rgb, rgb0, 0bgr, -bgr0, yuv420p12be, yuv420p12le, yuv420p14be, +rgba64le, bgra64be, bgra64le, yvyu422, ya16be, +ya16le, gbrap, gbrap16be, gbrap16le, qsv, mmal, +d3d11va_vld, cuda, 0rgb, rgb0, 0bgr, bgr0, +yuv420p12be, yuv420p12le, yuv420p14be, yuv420p14le, yuv422p12be, yuv422p12le, yuv422p14be, yuv422p14le, yuv444p12be, yuv444p12le, yuv444p14be, yuv444p14le, @@ -609,19 +614,22 @@ bayer_gbrg8, bayer_grbg8, bayer_bggr16le, bayer_bggr16be, bayer_rggb16le, bayer_rggb16be, bayer_gbrg16le, bayer_gbrg16be, bayer_grbg16le, -bayer_grbg16be, yuv440p10le, yuv440p10be, +bayer_grbg16be, xvmc, yuv440p10le, yuv440p10be, yuv440p12le, yuv440p12be, ayuv64le, ayuv64be, videotoolbox_vld, p010le, p010be, gbrap12be, gbrap12le, gbrap10be, gbrap10le, mediacodec, gray12be, gray12le, gray10be, gray10le, p016le, p016be, d3d11, gray9be, gray9le, gbrpf32be, -gbrpf32le, gbrapf32be, gbrapf32le, drm_prime +gbrpf32le, gbrapf32be, gbrapf32le, drm_prime, +opencl .TP \fB\-\-output\-csp\fR Specify output colorspace ["i420"] \- i420, i422, i444, rgb .HP \fB\-\-input\-depth\fR Specify input bit depth for raw input +.HP +\fB\-\-output\-depth\fR Specify output bit depth .TP \fB\-\-input\-range\fR Specify input color range ["auto"] @@ -778,11 +786,11 @@ offsets: the offset into the step to select a frame see: http://avisynth.nl/index.php/Select#SelectEvery .PP -(libswscale 4.8.100) -(libavformat 57.83.100) +(libswscale 5.1.100) +(libavformat 58.12.100) (ffmpegsource 2.23.0.0) -built on Dec 31 2017, gcc: 7.2.0 -x264 configuration: \fB\-\-bit\-depth\fR=\fI\,8\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR -libx264 configuration: \fB\-\-bit\-depth\fR=\fI\,10\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR +built on Aug 24 2018, gcc: 8.2.0 +x264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR +libx264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR x264 license: GPL version 2 or later libswscale/libavformat/ffmpegsource license: GPL version 2 or later diff -Nru x264-0.152.2854+gite9a5903/debian/x264-10bit.1 x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.1 --- x264-0.152.2854+gite9a5903/debian/x264-10bit.1 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -.TH x264-10bit 1 -.SH NAME -x264-10bit \- wrapper script for the 10-bit variant of the libx264 shared library -.SH SYNOPSIS -x264-10bit -.SH DESCRIPTION -This is a wrapper script that sets the LD_LIBRARY_PATH variable so that the 10-bit variant of the libx264 shared library is preferred over the regular 8-bit variant. It then calls the program with the arguments . -.SH OPTIONS -This wrapper script has no options. All arguments are passed over to the called program. -.SH SEE ALSO -x264(1) -.SH AUTHOR -Reinhard Tartler diff -Nru x264-0.152.2854+gite9a5903/debian/x264-10bit.in x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.in --- x264-0.152.2854+gite9a5903/debian/x264-10bit.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264-10bit.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2014 Reinhard Tartler -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# . -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# . -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -export LD_LIBRARY_PATH=/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH:-} - -if [ $# -gt 0 ]; then - exec "$@" - echo "Failed to execute '$@'" >&2 - exit 1 -fi - -echo "Usage `basename $0` " >&2 -exit 2 diff -Nru x264-0.152.2854+gite9a5903/debian/x264-get-orig-source x264-0.158.2988+git-20191101.7817004/debian/x264-get-orig-source --- x264-0.152.2854+gite9a5903/debian/x264-get-orig-source 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264-get-orig-source 2019-11-09 05:16:41.000000000 +0000 @@ -3,8 +3,8 @@ # Script used to generate the orig source tarball for x264. X264_GIT_URL="git://git.videolan.org/x264.git" -X264_GIT_COMMIT="e9a5903edf8ca59ef20e6f4894c196f135af735e" -DATE_RETRIEVED="20171224" +X264_GIT_COMMIT="0a84d986e7020f8344f00752e3600b9769cc1e85" +DATE_RETRIEVED="20180806" COMMIT_SHORT_FORM="$(echo $X264_GIT_COMMIT | \ sed -e 's/^\([[:xdigit:]]\{,7\}\).*/\1/')" diff -Nru x264-0.152.2854+gite9a5903/debian/x264.install x264-0.158.2988+git-20191101.7817004/debian/x264.install --- x264-0.152.2854+gite9a5903/debian/x264.install 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264.install 2019-11-09 05:16:41.000000000 +0000 @@ -1,2 +1 @@ shared/usr/bin/x264 usr/bin -shared/usr/bin/x264-10bit usr/bin diff -Nru x264-0.152.2854+gite9a5903/debian/x264.manpages x264-0.158.2988+git-20191101.7817004/debian/x264.manpages --- x264-0.152.2854+gite9a5903/debian/x264.manpages 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/debian/x264.manpages 2019-11-09 05:16:41.000000000 +0000 @@ -1,2 +1 @@ debian/x264.1 -debian/x264-10bit.1 diff -Nru x264-0.152.2854+gite9a5903/encoder/analyse.c x264-0.158.2988+git-20191101.7817004/encoder/analyse.c --- x264-0.152.2854+gite9a5903/encoder/analyse.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/analyse.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * analyse.c: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -120,124 +120,6 @@ } x264_mb_analysis_t; -/* lambda = pow(2,qp/6-2) */ -const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = -{ - 1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */ - 1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */ - 2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */ - 4, 4, 5, 6, 6, 7, 8, 9, /* 24-31 */ - 10, 11, 13, 14, 16, 18, 20, 23, /* 32-39 */ - 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */ - 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */ - 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */ - 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */ -1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */ -2580,2896, /* 80-81 */ -}; - -/* lambda2 = pow(lambda,2) * .9 * 256 */ -/* Capped to avoid overflow */ -const int x264_lambda2_tab[QP_MAX_MAX+1] = -{ - 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */ - 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */ - 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */ - 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */ - 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */ - 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */ - 943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */ - 5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */ - 38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */ -134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */ -134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */ -}; - -const uint8_t x264_exp2_lut[64] = -{ - 0, 3, 6, 8, 11, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45, - 48, 52, 55, 58, 62, 65, 69, 72, 76, 80, 83, 87, 91, 94, 98, 102, - 106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170, - 175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250 -}; - -const float x264_log2_lut[128] = -{ - 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682, - 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987, - 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840, - 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288, - 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370, - 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121, - 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570, - 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743, - 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662, - 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349, - 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819, - 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090, - 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175, - 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087, - 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837, - 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435, -}; - -/* Avoid an int/float conversion. */ -const float x264_log2_lz_lut[32] = -{ - 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 -}; - -// should the intra and inter lambdas be different? -// I'm just matching the behaviour of deadzone quant. -static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = -{ - // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS) - { - 46, 58, 73, 92, 117, 147, - 185, 233, 294, 370, 466, 587, - 740, 932, 1174, 1480, 1864, 2349, - 2959, 3728, 4697, 5918, 7457, 9395, - 11837, 14914, 18790, 23674, 29828, 37581, - 47349, 59656, 75163, 94699, 119313, 150326, - 189399, 238627, 300652, 378798, 477255, 601304, - 757596, 954511, 1202608, 1515192, 1909022, 2405217, - 3030384, 3818045, 4810435, 6060769, 7636091, 9620872, - 12121539, 15272182, 19241743, 24243077, 30544363, 38483486, - 48486154, 61088726, 76966972, 96972308, - 122177453,134217727,134217727,134217727,134217727,134217727, - 134217727,134217727,134217727,134217727,134217727,134217727, - }, - // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS) - { - 27, 34, 43, 54, 68, 86, - 108, 136, 172, 216, 273, 343, - 433, 545, 687, 865, 1090, 1374, - 1731, 2180, 2747, 3461, 4361, 5494, - 6922, 8721, 10988, 13844, 17442, 21976, - 27688, 34885, 43953, 55377, 69771, 87906, - 110755, 139543, 175813, 221511, 279087, 351627, - 443023, 558174, 703255, 886046, 1116348, 1406511, - 1772093, 2232697, 2813022, 3544186, 4465396, 5626046, - 7088374, 8930791, 11252092, 14176748, 17861583, 22504184, - 28353495, 35723165, 45008368, 56706990, - 71446330, 90016736,113413980,134217727,134217727,134217727, - 134217727,134217727,134217727,134217727,134217727,134217727, - 134217727,134217727,134217727,134217727,134217727,134217727, - } -}; - -#define MAX_CHROMA_LAMBDA_OFFSET 36 -static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = -{ - 16, 20, 25, 32, 40, 50, - 64, 80, 101, 128, 161, 203, - 256, 322, 406, 512, 645, 812, - 1024, 1290, 1625, 2048, 2580, 3250, - 4096, 5160, 6501, 8192, 10321, 13003, - 16384, 20642, 26007, 32768, 41285, 52015, - 65535 -}; - /* TODO: calculate CABAC costs */ static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] = { @@ -256,18 +138,14 @@ 5, 3, 3, 1 }; -static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ); - -static uint16_t x264_cost_ref[QP_MAX+1][3][33]; -static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER; -static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32]; +static void analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ); static int init_costs( x264_t *h, float *logs, int qp ) { if( h->cost_mv[qp] ) return 0; - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; int lambda = x264_lambda_tab[qp]; /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */ CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) ); @@ -277,11 +155,9 @@ h->cost_mv[qp][-i] = h->cost_mv[qp][i] = X264_MIN( (int)(lambda * logs[i] + .5f), UINT16_MAX ); } - x264_pthread_mutex_lock( &cost_ref_mutex ); for( int i = 0; i < 3; i++ ) for( int j = 0; j < 33; j++ ) - x264_cost_ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0; - x264_pthread_mutex_unlock( &cost_ref_mutex ); + h->cost_table->ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0; if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] ) { for( int j = 0; j < 4; j++ ) @@ -292,7 +168,7 @@ h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j]; } } - uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + qp*32; + uint16_t *cost_i4x4_mode = h->cost_table->i4x4_mode[qp]; for( int i = 0; i < 17; i++ ) cost_i4x4_mode[i] = 3*lambda*(i!=8); return 0; @@ -302,7 +178,7 @@ int x264_analyse_init_costs( x264_t *h ) { - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) ); if( !logs ) return -1; @@ -327,14 +203,16 @@ void x264_analyse_free_costs( x264_t *h ) { - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; for( int i = 0; i < QP_MAX+1; i++ ) { if( h->cost_mv[i] ) x264_free( h->cost_mv[i] - 2*4*mv_range ); - if( h->cost_mv_fpel[i][0] ) - for( int j = 0; j < 4; j++ ) + for( int j = 0; j < 4; j++ ) + { + if( h->cost_mv_fpel[i][j] ) x264_free( h->cost_mv_fpel[i][j] - 2*mv_range ); + } } } @@ -367,14 +245,14 @@ } /* initialize an array of lambda*nbits for all possible mvs */ -static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a ) { a->p_cost_mv = h->cost_mv[a->i_qp]; - a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)]; - a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)]; + a->p_cost_ref[0] = h->cost_table->ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)]; + a->p_cost_ref[1] = h->cost_table->ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)]; } -static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp ) +static void mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp ) { int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 ); a->i_lambda = x264_lambda_tab[qp]; @@ -413,7 +291,7 @@ h->mb.i_chroma_qp = h->chroma_qp_table[qp]; } -static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp ) +static void mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp ) { int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B); @@ -424,15 +302,15 @@ h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1; a->b_early_terminate = h->param.analyse.i_subpel_refine < 11; - x264_mb_analyse_init_qp( h, a, qp ); + mb_analyse_init_qp( h, a, qp ); h->mb.b_transform_8x8 = 0; /* I: Intra part */ a->i_satd_i16x16 = a->i_satd_i8x8 = - a->i_satd_i4x4 = - a->i_satd_chroma = COST_MAX; + a->i_satd_i4x4 = COST_MAX; + a->i_satd_chroma = CHROMA_FORMAT ? COST_MAX : 0; /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it. * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */ @@ -567,14 +445,12 @@ /* Fast intra decision */ if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 ) { - /* Always run in fast-intra mode for subme < 3 */ - if( h->mb.i_subpel_refine > 2 && - ( IS_INTRA( h->mb.i_mb_type_left[0] ) || + if( IS_INTRA( h->mb.i_mb_type_left[0] ) || IS_INTRA( h->mb.i_mb_type_top ) || IS_INTRA( h->mb.i_mb_type_topleft ) || IS_INTRA( h->mb.i_mb_type_topright ) || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) || - (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) ) + (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) { /* intra is likely */ } else { @@ -682,21 +558,19 @@ } /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */ -static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct ) +static inline void psy_trellis_init( x264_t *h, int do_both_dct ) { - ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0}; - if( do_both_dct || h->mb.b_transform_8x8 ) - h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero ); + h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], (pixel*)x264_zero ); if( do_both_dct || !h->mb.b_transform_8x8 ) - h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero ); + h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], (pixel*)x264_zero ); } /* Reset fenc satd scores cache for psy RD */ -static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd ) +static inline void mb_init_fenc_cache( x264_t *h, int b_satd ) { if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis ) - x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 ); + psy_trellis_init( h, h->param.analyse.b_transform_8x8 ); if( !h->mb.i_psy_rd ) return; @@ -709,7 +583,7 @@ h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) ); } -static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a ) { if( a->i_satd_chroma < COST_MAX ) return; @@ -791,7 +665,7 @@ } /* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */ -static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) +static void mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) { const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter; pixel *p_src = h->mb.pic.p_fenc[0]; @@ -867,7 +741,7 @@ return; } - uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8; + uint16_t *cost_i4x4_mode = h->cost_table->i4x4_mode[a->i_qp] + 8; /* 8x8 prediction selection */ if( flags & X264_ANALYSE_I8x8 ) { @@ -909,10 +783,11 @@ { if( !h->mb.b_lossless && predict_mode[5] >= 0 ) { - ALIGNED_ARRAY_16( int32_t, satd,[9] ); + ALIGNED_ARRAY_16( int32_t, satd,[4] ); h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd ); int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V]; - satd[i_pred_mode] -= 3 * lambda; + if( i_pred_mode < 3 ) + satd[i_pred_mode] -= 3 * lambda; for( int i = 2; i >= 0; i-- ) { int cost = satd[i]; @@ -1027,10 +902,11 @@ { if( !h->mb.b_lossless && predict_mode[5] >= 0 ) { - ALIGNED_ARRAY_16( int32_t, satd,[9] ); + ALIGNED_ARRAY_16( int32_t, satd,[4] ); h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd ); int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V]; - satd[i_pred_mode] -= 3 * lambda; + if( i_pred_mode < 3 ) + satd[i_pred_mode] -= 3 * lambda; i_best = satd[I_PRED_4x4_DC]; a->i_predict4x4[idx] = I_PRED_4x4_DC; COPY2_IF_LT( i_best, satd[I_PRED_4x4_H], a->i_predict4x4[idx], I_PRED_4x4_H ); COPY2_IF_LT( i_best, satd[I_PRED_4x4_V], a->i_predict4x4[idx], I_PRED_4x4_V ); @@ -1103,7 +979,7 @@ } } -static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh ) +static void intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh ) { if( !a->b_early_terminate ) i_satd_thresh = COST_MAX; @@ -1111,8 +987,8 @@ if( a->i_satd_i16x16 < i_satd_thresh ) { h->mb.i_type = I_16x16; - x264_analyse_update_cache( h, a ); - a->i_satd_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_satd_i16x16 = rd_cost_mb( h, a->i_lambda2 ); } else a->i_satd_i16x16 = COST_MAX; @@ -1120,8 +996,8 @@ if( a->i_satd_i4x4 < i_satd_thresh ) { h->mb.i_type = I_4x4; - x264_analyse_update_cache( h, a ); - a->i_satd_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_satd_i4x4 = rd_cost_mb( h, a->i_lambda2 ); } else a->i_satd_i4x4 = COST_MAX; @@ -1129,15 +1005,15 @@ if( a->i_satd_i8x8 < i_satd_thresh ) { h->mb.i_type = I_8x8; - x264_analyse_update_cache( h, a ); - a->i_satd_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_satd_i8x8 = rd_cost_mb( h, a->i_lambda2 ); a->i_cbp_i8x8_luma = h->mb.i_cbp_luma; } else a->i_satd_i8x8 = COST_MAX; } -static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) +static void intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) { uint64_t i_satd, i_best; int plane_count = CHROMA444 ? 3 : 1; @@ -1155,13 +1031,13 @@ if( i_mode == old_pred_mode || a->i_satd_i16x16_dir[i_mode] > i_thresh ) continue; h->mb.i_intra16x16_pred_mode = i_mode; - i_satd = x264_rd_cost_mb( h, a->i_lambda2 ); + i_satd = rd_cost_mb( h, a->i_lambda2 ); COPY2_IF_LT( i_best, i_satd, a->i_predict16x16, i_mode ); } } /* RD selection for chroma prediction */ - if( !CHROMA444 ) + if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 ) { const int8_t *predict_mode = predict_chroma_mode_available( h->mb.i_neighbour_intra ); if( predict_mode[1] >= 0 ) @@ -1181,10 +1057,10 @@ { int i_cbp_chroma_best = h->mb.i_cbp_chroma; int i_chroma_lambda = x264_lambda2_tab[h->mb.i_chroma_qp]; - /* the previous thing encoded was x264_intra_rd(), so the pixels and + /* the previous thing encoded was intra_rd(), so the pixels and * coefs for the current chroma mode are still around, so we only * have to recount the bits. */ - i_best = x264_rd_cost_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 ); + i_best = rd_cost_chroma( h, i_chroma_lambda, a->i_predict8x8chroma, 0 ); for( int i = 0; i < i_max; i++ ) { int i_mode = predict_mode_sorted[i]; @@ -1198,7 +1074,7 @@ /* if we've already found a mode that needs no residual, then * probably any mode with a residual will be worse. * so avoid dct on the remaining modes to improve speed. */ - i_satd = x264_rd_cost_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 ); + i_satd = rd_cost_chroma( h, i_chroma_lambda, i_mode, h->mb.i_cbp_chroma != 0x00 ); COPY3_IF_LT( i_best, i_satd, a->i_predict8x8chroma, i_mode, i_cbp_chroma_best, h->mb.i_cbp_chroma ); } h->mb.i_chroma_pred_mode = a->i_predict8x8chroma; @@ -1228,7 +1104,7 @@ for( ; *predict_mode >= 0; predict_mode++ ) { int i_mode = *predict_mode; - i_satd = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode ); + i_satd = rd_cost_i4x4( h, a->i_lambda2, idx, i_mode ); if( i_best > i_satd ) { @@ -1287,7 +1163,7 @@ continue; h->mb.i_cbp_luma = a->i_cbp_i8x8_luma; - i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode, edge ); + i_satd = rd_cost_i8x8( h, a->i_lambda2, idx, i_mode, edge ); if( i_best > i_satd ) { @@ -1366,7 +1242,7 @@ #define REF_COST(list, ref) \ (a->p_cost_ref[list][ref]) -static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; int i_mvc; @@ -1415,7 +1291,7 @@ && x264_macroblock_probe_pskip( h ) ) { h->mb.i_type = P_SKIP; - x264_analyse_update_cache( h, a ); + analyse_update_cache( h, a ); assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 ); return; } @@ -1433,19 +1309,19 @@ h->mb.i_type = P_L0; if( a->i_mbrd ) { - x264_mb_init_fenc_cache( h, a->i_mbrd >= 2 || h->param.analyse.inter & X264_ANALYSE_PSUB8x8 ); + mb_init_fenc_cache( h, a->i_mbrd >= 2 || h->param.analyse.inter & X264_ANALYSE_PSUB8x8 ); if( a->l0.me16x16.i_ref == 0 && M32( a->l0.me16x16.mv ) == M32( h->mb.cache.pskip_mv ) && !a->b_force_intra ) { h->mb.i_partition = D_16x16; x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv ); - a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 ); if( !(h->mb.i_cbp_luma|h->mb.i_cbp_chroma) ) h->mb.i_type = P_SKIP; } } } -static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1536,7 +1412,7 @@ M32( h->mb.i_sub_partition ) = D_L0_8x8 * 0x01010101; } -static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) { /* Duplicate refs are rarely useful in p8x8 due to the high cost of the * reference frame flags. Thus, if we're not doing mixedrefs, just @@ -1591,7 +1467,7 @@ M32( h->mb.i_sub_partition ) = D_L0_8x8 * 0x01010101; } -static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) +static void mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { x264_me_t m; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1657,7 +1533,7 @@ a->l0.i_cost16x8 = a->l0.me16x8[0].cost + a->l0.me16x8[1].cost; } -static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) +static void mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { x264_me_t m; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1722,8 +1598,8 @@ a->l0.i_cost8x16 = a->l0.me8x16[0].cost + a->l0.me8x16[1].cost; } -static ALWAYS_INLINE int x264_mb_analyse_inter_p4x4_chroma_internal( x264_t *h, x264_mb_analysis_t *a, - pixel **p_fref, int i8x8, int size, int chroma ) +static ALWAYS_INLINE int mb_analyse_inter_p4x4_chroma_internal( x264_t *h, x264_mb_analysis_t *a, + pixel **p_fref, int i8x8, int size, int chroma ) { ALIGNED_ARRAY_32( pixel, pix1,[16*16] ); pixel *pix2 = pix1+8; @@ -1786,17 +1662,17 @@ + h->pixf.mbcmp[chromapix]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 ); } -static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, pixel **p_fref, int i8x8, int size ) +static int mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, pixel **p_fref, int i8x8, int size ) { if( CHROMA_FORMAT == CHROMA_444 ) - return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_444 ); + return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_444 ); else if( CHROMA_FORMAT == CHROMA_422 ) - return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_422 ); + return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_422 ); else - return x264_mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_420 ); + return mb_analyse_inter_p4x4_chroma_internal( h, a, p_fref, i8x8, size, CHROMA_420 ); } -static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) +static void mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) { pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref]; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1832,10 +1708,10 @@ REF_COST( 0, i_ref ) + a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x4]; if( h->mb.b_chroma_me && !CHROMA444 ) - a->l0.i_cost4x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 ); + a->l0.i_cost4x4[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x4 ); } -static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) +static void mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) { pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref]; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1868,10 +1744,10 @@ REF_COST( 0, i_ref ) + a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x4]; if( h->mb.b_chroma_me && !CHROMA444 ) - a->l0.i_cost8x4[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 ); + a->l0.i_cost8x4[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_8x4 ); } -static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) +static void mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8x8 ) { pixel **p_fref = h->mb.pic.p_fref[0][a->l0.me8x8[i8x8].i_ref]; pixel **p_fenc = h->mb.pic.p_fenc; @@ -1904,10 +1780,10 @@ REF_COST( 0, i_ref ) + a->i_lambda * i_sub_mb_p_cost_table[D_L0_4x8]; if( h->mb.b_chroma_me && !CHROMA444 ) - a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 ); + a->l0.i_cost4x8[i8x8] += mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 ); } -static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel ) +static ALWAYS_INLINE int analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel ) { ALIGNED_ARRAY_32( pixel, pix, [4],[16*16] ); ALIGNED_ARRAY_32( pixel, bi, [2],[16*16] ); @@ -1955,7 +1831,7 @@ return i_chroma_cost; } -static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a ) { /* Assumes that fdec still contains the results of * x264_mb_predict_mv_direct16x16 and x264_mb_mc */ @@ -2001,7 +1877,7 @@ } } -static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) { ALIGNED_ARRAY_32( pixel, pix0,[16*16] ); ALIGNED_ARRAY_32( pixel, pix1,[16*16] ); @@ -2073,7 +1949,7 @@ { /* We already tested skip */ h->mb.i_type = B_SKIP; - x264_analyse_update_cache( h, a ); + analyse_update_cache( h, a ); return; } } @@ -2105,7 +1981,7 @@ + a->l1.bi16x16.cost_mv; if( h->mb.b_chroma_me ) - a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 ); + a->i_cost16x16bi += analyse_bi_chroma( h, a, 0, PIXEL_16x16 ); /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */ if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) ) @@ -2137,7 +2013,7 @@ } else { - ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] ); + ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] ); int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; int v_shift = CHROMA_V_SHIFT; @@ -2187,7 +2063,7 @@ a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1]; } -static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i ) +static inline void mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i ) { int x = 2*(i&1); int y = i&2; @@ -2217,7 +2093,7 @@ } } -static void x264_mb_load_mv_direct8x8( x264_t *h, int idx ) +static void mb_load_mv_direct8x8( x264_t *h, int idx ) { int x = 2*(idx&1); int y = idx&2; @@ -2253,13 +2129,13 @@ x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0 ); \ } -static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) +static inline void mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) { int x = 2*(i&1); int y = i&2; if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) { - x264_mb_load_mv_direct8x8( h, i ); + mb_load_mv_direct8x8( h, i ); if( b_mvd ) { x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0 ); @@ -2272,17 +2148,17 @@ CACHE_MV_BI( x, y, 2, 2, a->l0.me8x8[i], a->l1.me8x8[i], h->mb.i_sub_partition[i] ); } } -static inline void x264_mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) +static inline void mb_cache_mv_b16x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) { CACHE_MV_BI( 0, 2*i, 4, 2, a->l0.me16x8[i], a->l1.me16x8[i], a->i_mb_partition16x8[i] ); } -static inline void x264_mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) +static inline void mb_cache_mv_b8x16( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd ) { CACHE_MV_BI( 2*i, 0, 2, 4, a->l0.me8x16[i], a->l1.me8x16[i], a->i_mb_partition8x16[i] ); } #undef CACHE_MV_BI -static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t *a ) { ALIGNED_ARRAY_16( pixel, pix,[2],[8*8] ); int i_maxref[2] = {h->mb.pic.i_fref[0]-1, h->mb.pic.i_fref[1]-1}; @@ -2371,7 +2247,7 @@ if( h->mb.b_chroma_me ) { - int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 ); + int i_chroma_cost = analyse_bi_chroma( h, a, i, PIXEL_8x8 ); i_part_cost_bi += i_chroma_cost; a->i_satd8x8[2][i] += i_chroma_cost; } @@ -2387,14 +2263,14 @@ a->i_cost8x8bi += i_part_cost; /* XXX Needed for x264_mb_predict_mv */ - x264_mb_cache_mv_b8x8( h, a, i, 0 ); + mb_cache_mv_b8x8( h, a, i, 0 ); } /* mb type cost */ a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8]; } -static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) +static void mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) { pixel **p_fref[2] = { h->mb.pic.p_fref[0][a->l0.me16x16.i_ref], @@ -2451,7 +2327,7 @@ if( h->mb.b_chroma_me ) { - int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 ); + int i_chroma_cost = analyse_bi_chroma( h, a, i, PIXEL_8x8 ); i_part_cost_bi += i_chroma_cost; a->i_satd8x8[2][i] += i_chroma_cost; } @@ -2464,14 +2340,14 @@ a->i_cost8x8bi += i_part_cost; /* XXX Needed for x264_mb_predict_mv */ - x264_mb_cache_mv_b8x8( h, a, i, 0 ); + mb_cache_mv_b8x8( h, a, i, 0 ); } /* mb type cost */ a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8]; } -static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) +static void mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { ALIGNED_ARRAY_32( pixel, pix,[2],[16*8] ); ALIGNED_4( int16_t mvc[3][2] ); @@ -2529,7 +2405,7 @@ + a->l1.me16x8[i].i_ref_cost; if( h->mb.b_chroma_me ) - i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 ); + i_part_cost_bi += analyse_bi_chroma( h, a, i, PIXEL_16x8 ); i_part_cost = a->l0.me16x8[i].cost; a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */ @@ -2555,7 +2431,7 @@ return; } - x264_mb_cache_mv_b16x8( h, a, i, 0 ); + mb_cache_mv_b16x8( h, a, i, 0 ); } /* mb type cost */ @@ -2565,7 +2441,7 @@ a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8]; } -static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) +static void mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { ALIGNED_ARRAY_16( pixel, pix,[2],[8*16] ); ALIGNED_4( int16_t mvc[3][2] ); @@ -2622,7 +2498,7 @@ + a->l1.me8x16[i].i_ref_cost; if( h->mb.b_chroma_me ) - i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 ); + i_part_cost_bi += analyse_bi_chroma( h, a, i, PIXEL_8x16 ); i_part_cost = a->l0.me8x16[i].cost; a->i_mb_partition8x16[i] = D_L0_8x8; @@ -2648,7 +2524,7 @@ return; } - x264_mb_cache_mv_b8x16( h, a, i, 0 ); + mb_cache_mv_b8x16( h, a, i, 0 ); } /* mb type cost */ @@ -2658,7 +2534,7 @@ a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16]; } -static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) +static void mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) { int thresh = a->b_early_terminate ? i_satd * 5/4 + 1 : COST_MAX; @@ -2666,15 +2542,15 @@ if( a->l0.i_rd16x16 == COST_MAX && (!a->b_early_terminate || a->l0.me16x16.cost <= i_satd * 3/2) ) { h->mb.i_partition = D_16x16; - x264_analyse_update_cache( h, a ); - a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 ); } if( a->l0.i_cost16x8 < thresh ) { h->mb.i_partition = D_16x8; - x264_analyse_update_cache( h, a ); - a->l0.i_cost16x8 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l0.i_cost16x8 = rd_cost_mb( h, a->i_lambda2 ); } else a->l0.i_cost16x8 = COST_MAX; @@ -2682,8 +2558,8 @@ if( a->l0.i_cost8x16 < thresh ) { h->mb.i_partition = D_8x16; - x264_analyse_update_cache( h, a ); - a->l0.i_cost8x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l0.i_cost8x16 = rd_cost_mb( h, a->i_lambda2 ); } else a->l0.i_cost8x16 = COST_MAX; @@ -2712,7 +2588,7 @@ if( costs[subtype] > sub8x8_thresh ) continue; h->mb.i_sub_partition[i] = subtype; - x264_mb_cache_mv_p8x8( h, a, i ); + mb_cache_mv_p8x8( h, a, i ); if( subtype == btype ) continue; cost = x264_rd_cost_part( h, a->i_lambda2, i<<2, PIXEL_8x8 ); @@ -2721,19 +2597,19 @@ if( h->mb.i_sub_partition[i] != btype ) { h->mb.i_sub_partition[i] = btype; - x264_mb_cache_mv_p8x8( h, a, i ); + mb_cache_mv_p8x8( h, a, i ); } } } else - x264_analyse_update_cache( h, a ); - a->l0.i_cost8x8 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l0.i_cost8x8 = rd_cost_mb( h, a->i_lambda2 ); } else a->l0.i_cost8x8 = COST_MAX; } -static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) +static void mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) { int thresh = a->b_early_terminate ? i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1 : COST_MAX; @@ -2743,8 +2619,8 @@ /* Assumes direct/skip MC is still in fdec */ /* Requires b-rdo to be done before intra analysis */ h->mb.b_skip_mc = 1; - x264_analyse_update_cache( h, a ); - a->i_rd16x16direct = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_rd16x16direct = rd_cost_mb( h, a->i_lambda2 ); h->mb.b_skip_mc = 0; } @@ -2754,24 +2630,24 @@ if( a->l0.me16x16.cost < thresh && a->l0.i_rd16x16 == COST_MAX ) { h->mb.i_type = B_L0_L0; - x264_analyse_update_cache( h, a ); - a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l0.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 ); } /* L1 */ if( a->l1.me16x16.cost < thresh && a->l1.i_rd16x16 == COST_MAX ) { h->mb.i_type = B_L1_L1; - x264_analyse_update_cache( h, a ); - a->l1.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->l1.i_rd16x16 = rd_cost_mb( h, a->i_lambda2 ); } /* BI */ if( a->i_cost16x16bi < thresh && a->i_rd16x16bi == COST_MAX ) { h->mb.i_type = B_BI_BI; - x264_analyse_update_cache( h, a ); - a->i_rd16x16bi = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_rd16x16bi = rd_cost_mb( h, a->i_lambda2 ); } /* 8x8 */ @@ -2779,8 +2655,8 @@ { h->mb.i_type = B_8x8; h->mb.i_partition = D_8x8; - x264_analyse_update_cache( h, a ); - a->i_rd8x8bi = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_rd8x8bi = rd_cost_mb( h, a->i_lambda2 ); x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 ); } @@ -2789,8 +2665,8 @@ { h->mb.i_type = a->i_mb_type16x8; h->mb.i_partition = D_16x8; - x264_analyse_update_cache( h, a ); - a->i_rd16x8bi = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_rd16x8bi = rd_cost_mb( h, a->i_lambda2 ); } /* 8x16 */ @@ -2798,12 +2674,12 @@ { h->mb.i_type = a->i_mb_type8x16; h->mb.i_partition = D_8x16; - x264_analyse_update_cache( h, a ); - a->i_rd8x16bi = x264_rd_cost_mb( h, a->i_lambda2 ); + analyse_update_cache( h, a ); + a->i_rd8x16bi = rd_cost_mb( h, a->i_lambda2 ); } } -static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a ) +static void refine_bidir( x264_t *h, x264_mb_analysis_t *a ) { int i_biweight; @@ -2846,7 +2722,7 @@ } } -static inline void x264_mb_analyse_transform( x264_t *h ) +static inline void mb_analyse_transform( x264_t *h ) { if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 && !h->mb.b_lossless ) { @@ -2884,7 +2760,7 @@ } } -static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_satd, int *i_rd ) +static inline void mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *a, int *i_satd, int *i_rd ) { if( h->param.analyse.b_transform_8x8 && h->pps->b_transform_8x8_mode ) { @@ -2895,10 +2771,10 @@ else if( !x264_transform_allowed[h->mb.i_type] ) return; - x264_analyse_update_cache( h, a ); + analyse_update_cache( h, a ); h->mb.b_transform_8x8 ^= 1; /* FIXME only luma is needed for 4:2:0, but the score for comparison already includes chroma */ - int i_rd8 = x264_rd_cost_mb( h, a->i_lambda2 ); + int i_rd8 = rd_cost_mb( h, a->i_lambda2 ); if( *i_rd >= i_rd8 ) { @@ -2921,12 +2797,12 @@ * There must be a more efficient way to get that portion of the benefit * without doing full QP-RD, but RD-decimation doesn't seem to do the * trick. */ -static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a ) +static inline void mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a ) { int bcost, cost, failures, prevcost, origcost; int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp; int last_qp_tried = 0; - origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 ); + origcost = bcost = rd_cost_mb( h, a->i_lambda2 ); int origcbp = h->mb.cbp[h->mb.i_mb_xy]; /* If CBP is already zero, don't raise the quantizer any higher. */ @@ -2959,7 +2835,7 @@ { h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, SPEC_QP( h->param.rc.i_qp_min ) ); h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp]; - already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 ); + already_checked_cost = rd_cost_mb( h, a->i_lambda2 ); if( !h->mb.cbp[h->mb.i_mb_xy] ) { /* If our empty-CBP block is lower QP than the last QP, @@ -2983,7 +2859,7 @@ else { h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp]; - cost = x264_rd_cost_mb( h, a->i_lambda2 ); + cost = rd_cost_mb( h, a->i_lambda2 ); COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp ); } @@ -3008,7 +2884,7 @@ { h->mb.i_qp = h->mb.i_last_qp; h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp]; - cost = x264_rd_cost_mb( h, a->i_lambda2 ); + cost = rd_cost_mb( h, a->i_lambda2 ); COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp ); } @@ -3020,7 +2896,7 @@ x264_mb_transform_8x8_allowed( h ) ) { h->mb.b_transform_8x8 ^= 1; - cost = x264_rd_cost_mb( h, a->i_lambda2 ); + cost = rd_cost_mb( h, a->i_lambda2 ); if( cost > bcost ) h->mb.b_transform_8x8 ^= 1; } @@ -3042,17 +2918,17 @@ if( h->param.analyse.b_mb_info ) h->fdec->effective_qp[h->mb.i_mb_xy] = h->mb.i_qp; /* Store the real analysis QP. */ - x264_mb_analyse_init( h, &analysis, h->mb.i_qp ); + mb_analyse_init( h, &analysis, h->mb.i_qp ); /*--------------------------- Do the analysis ---------------------------*/ if( h->sh.i_type == SLICE_TYPE_I ) { intra_analysis: if( analysis.i_mbrd ) - x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); - x264_mb_analyse_intra( h, &analysis, COST_MAX ); + mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); + mb_analyse_intra( h, &analysis, COST_MAX ); if( analysis.i_mbrd ) - x264_intra_rd( h, &analysis, COST_MAX ); + intra_rd( h, &analysis, COST_MAX ); i_cost = analysis.i_satd_i16x16; h->mb.i_type = I_16x16; @@ -3062,7 +2938,7 @@ h->mb.i_type = I_PCM; else if( analysis.i_mbrd >= 2 ) - x264_intra_rd_refine( h, &analysis ); + intra_rd_refine( h, &analysis ); } else if( h->sh.i_type == SLICE_TYPE_P ) { @@ -3075,7 +2951,7 @@ { if( !h->param.analyse.b_psy ) { - x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); + mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); goto intra_analysis; } } @@ -3147,9 +3023,9 @@ int i_partition; int i_satd_inter, i_satd_intra; - x264_mb_analyse_load_costs( h, &analysis ); + mb_analyse_load_costs( h, &analysis ); - x264_mb_analyse_inter_p16x16( h, &analysis ); + mb_analyse_inter_p16x16( h, &analysis ); if( h->mb.i_type == P_SKIP ) { @@ -3161,9 +3037,9 @@ if( flags & X264_ANALYSE_PSUB16x16 ) { if( h->param.analyse.b_mixed_references ) - x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis ); + mb_analyse_inter_p8x8_mixed_ref( h, &analysis ); else - x264_mb_analyse_inter_p8x8( h, &analysis ); + mb_analyse_inter_p8x8( h, &analysis ); } /* Select best inter mode */ @@ -3183,24 +3059,24 @@ { for( int i = 0; i < 4; i++ ) { - x264_mb_analyse_inter_p4x4( h, &analysis, i ); + mb_analyse_inter_p4x4( h, &analysis, i ); int i_thresh8x4 = analysis.l0.me4x4[i][1].cost_mv + analysis.l0.me4x4[i][2].cost_mv; if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost + i_thresh8x4 ) { int i_cost8x8 = analysis.l0.i_cost4x4[i]; h->mb.i_sub_partition[i] = D_L0_4x4; - x264_mb_analyse_inter_p8x4( h, &analysis, i ); + mb_analyse_inter_p8x4( h, &analysis, i ); COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost8x4[i], h->mb.i_sub_partition[i], D_L0_8x4 ); - x264_mb_analyse_inter_p4x8( h, &analysis, i ); + mb_analyse_inter_p4x8( h, &analysis, i ); COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost4x8[i], h->mb.i_sub_partition[i], D_L0_4x8 ); i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost; } - x264_mb_cache_mv_p8x8( h, &analysis, i ); + mb_cache_mv_p8x8( h, &analysis, i ); } analysis.l0.i_cost8x8 = i_cost; } @@ -3215,14 +3091,14 @@ + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1; analysis.i_cost_est16x8[1] = analysis.i_satd8x8[0][2] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost; - x264_mb_analyse_inter_p16x8( h, &analysis, i_cost ); + mb_analyse_inter_p16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.l0.i_cost16x8, i_type, P_L0, i_partition, D_16x8 ); i_avg_mv_ref_cost = (analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[1].i_ref_cost + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1; analysis.i_cost_est8x16[1] = analysis.i_satd8x8[0][1] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost; - x264_mb_analyse_inter_p8x16( h, &analysis, i_cost ); + mb_analyse_inter_p8x16( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.l0.i_cost8x16, i_type, P_L0, i_partition, D_8x16 ); } @@ -3296,20 +3172,20 @@ { if( CHROMA444 ) { - x264_mb_analyse_intra( h, &analysis, i_cost ); - x264_mb_analyse_intra_chroma( h, &analysis ); + mb_analyse_intra( h, &analysis, i_cost ); + mb_analyse_intra_chroma( h, &analysis ); } else { - x264_mb_analyse_intra_chroma( h, &analysis ); - x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma ); + mb_analyse_intra_chroma( h, &analysis ); + mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma ); } analysis.i_satd_i16x16 += analysis.i_satd_chroma; analysis.i_satd_i8x8 += analysis.i_satd_chroma; analysis.i_satd_i4x4 += analysis.i_satd_chroma; } else - x264_mb_analyse_intra( h, &analysis, i_cost ); + mb_analyse_intra( h, &analysis, i_cost ); i_satd_inter = i_cost; i_satd_intra = X264_MIN3( analysis.i_satd_i16x16, @@ -3318,7 +3194,7 @@ if( analysis.i_mbrd ) { - x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) ); + mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) ); i_type = P_L0; i_partition = D_16x16; i_cost = analysis.l0.i_rd16x16; @@ -3328,8 +3204,8 @@ h->mb.i_type = i_type; h->mb.i_partition = i_partition; if( i_cost < COST_MAX ) - x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); - x264_intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 ); + mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); + intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 ); } COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); @@ -3343,7 +3219,7 @@ { /* Intra masking: copy fdec to fenc and re-encode the block as intra in order to make it appear as if * it was an inter block. */ - x264_analyse_update_cache( h, &analysis ); + analyse_update_cache( h, &analysis ); x264_macroblock_encode( h ); for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE, h->mb.pic.p_fdec[p], FDEC_STRIDE, 16 ); @@ -3353,7 +3229,7 @@ h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, height ); h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, height ); } - x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); + mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); goto intra_analysis; } @@ -3361,7 +3237,7 @@ { if( IS_INTRA( h->mb.i_type ) ) { - x264_intra_rd_refine( h, &analysis ); + intra_rd_refine( h, &analysis ); } else if( i_partition == D_16x16 ) { @@ -3387,7 +3263,7 @@ } else if( i_partition == D_8x8 ) { - x264_analyse_update_cache( h, &analysis ); + analyse_update_cache( h, &analysis ); for( int i8x8 = 0; i8x8 < 4; i8x8++ ) { if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 ) @@ -3422,7 +3298,7 @@ int b_skip = 0; if( analysis.i_mbrd ) - x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); + mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); h->mb.i_type = B_SKIP; if( h->mb.b_direct_auto_write ) @@ -3490,14 +3366,14 @@ h->mb.b_skip_mc = 0; h->mb.i_type = B_DIRECT; - x264_mb_analyse_load_costs( h, &analysis ); + mb_analyse_load_costs( h, &analysis ); /* select best inter mode */ /* direct must be first */ if( analysis.b_direct_available ) - x264_mb_analyse_inter_direct( h, &analysis ); + mb_analyse_inter_direct( h, &analysis ); - x264_mb_analyse_inter_b16x16( h, &analysis ); + mb_analyse_inter_b16x16( h, &analysis ); if( h->mb.i_type == B_SKIP ) { @@ -3517,14 +3393,14 @@ if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 ) { - x264_mb_analyse_b_rd( h, &analysis, i_cost ); + mb_analyse_b_rd( h, &analysis, i_cost ); if( i_bskip_cost < analysis.i_rd16x16direct && i_bskip_cost < analysis.i_rd16x16bi && i_bskip_cost < analysis.l0.i_rd16x16 && i_bskip_cost < analysis.l1.i_rd16x16 ) { h->mb.i_type = B_SKIP; - x264_analyse_update_cache( h, &analysis ); + analyse_update_cache( h, &analysis ); return; } } @@ -3532,9 +3408,9 @@ if( flags & X264_ANALYSE_BSUB16x16 ) { if( h->param.analyse.b_mixed_references ) - x264_mb_analyse_inter_b8x8_mixed_ref( h, &analysis ); + mb_analyse_inter_b8x8_mixed_ref( h, &analysis ); else - x264_mb_analyse_inter_b8x8( h, &analysis ); + mb_analyse_inter_b8x8( h, &analysis ); COPY3_IF_LT( i_cost, analysis.i_cost8x8bi, i_type, B_8x8, i_partition, D_8x8 ); @@ -3584,17 +3460,17 @@ int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total; if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { - x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); + mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); } if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost ) { - x264_mb_analyse_inter_b8x16( h, &analysis, i_cost ); + mb_analyse_inter_b8x16( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 ); } if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { - x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); + mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); } } @@ -3686,7 +3562,7 @@ if( analysis.i_mbrd ) { - x264_mb_analyse_b_rd( h, &analysis, i_satd_inter ); + mb_analyse_b_rd( h, &analysis, i_satd_inter ); i_type = B_SKIP; i_cost = i_bskip_cost; i_partition = D_16x16; @@ -3706,25 +3582,25 @@ { if( CHROMA444 ) { - x264_mb_analyse_intra( h, &analysis, i_satd_inter ); - x264_mb_analyse_intra_chroma( h, &analysis ); + mb_analyse_intra( h, &analysis, i_satd_inter ); + mb_analyse_intra_chroma( h, &analysis ); } else { - x264_mb_analyse_intra_chroma( h, &analysis ); - x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma ); + mb_analyse_intra_chroma( h, &analysis ); + mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma ); } analysis.i_satd_i16x16 += analysis.i_satd_chroma; analysis.i_satd_i8x8 += analysis.i_satd_chroma; analysis.i_satd_i4x4 += analysis.i_satd_chroma; } else - x264_mb_analyse_intra( h, &analysis, i_satd_inter ); + mb_analyse_intra( h, &analysis, i_satd_inter ); if( analysis.i_mbrd ) { - x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); - x264_intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 ); + mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); + intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 ); } COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); @@ -3736,14 +3612,14 @@ h->mb.i_partition = i_partition; if( analysis.i_mbrd >= 2 && IS_INTRA( i_type ) && i_type != I_PCM ) - x264_intra_rd_refine( h, &analysis ); + intra_rd_refine( h, &analysis ); if( h->mb.i_subpel_refine >= 5 ) - x264_refine_bidir( h, &analysis ); + refine_bidir( h, &analysis ); if( analysis.i_mbrd >= 2 && i_type > B_DIRECT && i_type < B_SKIP ) { int i_biweight; - x264_analyse_update_cache( h, &analysis ); + analyse_update_cache( h, &analysis ); if( i_partition == D_16x16 ) { @@ -3814,7 +3690,7 @@ } } - x264_analyse_update_cache( h, &analysis ); + analyse_update_cache( h, &analysis ); /* In rare cases we can end up qpel-RDing our way back to a larger partition size * without realizing it. Check for this and account for it if necessary. */ @@ -3830,22 +3706,22 @@ } if( !analysis.i_mbrd ) - x264_mb_analyse_transform( h ); + mb_analyse_transform( h ); if( analysis.i_mbrd == 3 && !IS_SKIP(h->mb.i_type) ) - x264_mb_analyse_qp_rd( h, &analysis ); + mb_analyse_qp_rd( h, &analysis ); h->mb.b_trellis = h->param.analyse.i_trellis; h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type )); if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 ) - x264_psy_trellis_init( h, 0 ); + psy_trellis_init( h, 0 ); if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction ) h->mb.i_skip_intra = 0; } /*-------------------- Update MB from the analysis ----------------------*/ -static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ) +static void analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ) { switch( h->mb.i_type ) { @@ -3853,17 +3729,17 @@ for( int i = 0; i < 16; i++ ) h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = a->i_predict4x4[i]; - x264_mb_analyse_intra_chroma( h, a ); + mb_analyse_intra_chroma( h, a ); break; case I_8x8: for( int i = 0; i < 4; i++ ) x264_macroblock_cache_intra8x8_pred( h, 2*(i&1), 2*(i>>1), a->i_predict8x8[i] ); - x264_mb_analyse_intra_chroma( h, a ); + mb_analyse_intra_chroma( h, a ); break; case I_16x16: h->mb.i_intra16x16_pred_mode = a->i_predict16x16; - x264_mb_analyse_intra_chroma( h, a ); + mb_analyse_intra_chroma( h, a ); break; case I_PCM: @@ -3903,7 +3779,7 @@ x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, a->l0.me8x8[2].i_ref ); x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, a->l0.me8x8[3].i_ref ); for( int i = 0; i < 4; i++ ) - x264_mb_cache_mv_p8x8( h, a, i ); + mb_cache_mv_p8x8( h, a, i ); break; case P_SKIP: @@ -3917,16 +3793,16 @@ case B_SKIP: case B_DIRECT: h->mb.i_partition = h->mb.cache.direct_partition; - x264_mb_load_mv_direct8x8( h, 0 ); - x264_mb_load_mv_direct8x8( h, 1 ); - x264_mb_load_mv_direct8x8( h, 2 ); - x264_mb_load_mv_direct8x8( h, 3 ); + mb_load_mv_direct8x8( h, 0 ); + mb_load_mv_direct8x8( h, 1 ); + mb_load_mv_direct8x8( h, 2 ); + mb_load_mv_direct8x8( h, 3 ); break; case B_8x8: /* optimize: cache might not need to be rewritten */ for( int i = 0; i < 4; i++ ) - x264_mb_cache_mv_b8x8( h, a, i, 1 ); + mb_cache_mv_b8x8( h, a, i, 1 ); break; default: /* the rest of the B types */ @@ -3961,12 +3837,12 @@ } break; case D_16x8: - x264_mb_cache_mv_b16x8( h, a, 0, 1 ); - x264_mb_cache_mv_b16x8( h, a, 1, 1 ); + mb_cache_mv_b16x8( h, a, 0, 1 ); + mb_cache_mv_b16x8( h, a, 1, 1 ); break; case D_8x16: - x264_mb_cache_mv_b8x16( h, a, 0, 1 ); - x264_mb_cache_mv_b8x16( h, a, 1, 1 ); + mb_cache_mv_b8x16( h, a, 0, 1 ); + mb_cache_mv_b8x16( h, a, 1, 1 ); break; default: x264_log( h, X264_LOG_ERROR, "internal error (invalid MB type)\n" ); @@ -3995,10 +3871,10 @@ x264_log( h, X264_LOG_DEBUG, "mb_xy: %d,%d \n", h->mb.i_mb_x, h->mb.i_mb_y); x264_log( h, X264_LOG_DEBUG, "completed: %d \n", completed ); x264_log( h, X264_LOG_WARNING, "recovering by using intra mode\n"); - x264_mb_analyse_intra( h, a, COST_MAX ); + mb_analyse_intra( h, a, COST_MAX ); h->mb.i_type = I_16x16; h->mb.i_intra16x16_pred_mode = a->i_predict16x16; - x264_mb_analyse_intra_chroma( h, a ); + mb_analyse_intra_chroma( h, a ); } } } diff -Nru x264-0.152.2854+gite9a5903/encoder/analyse.h x264-0.158.2988+git-20191101.7817004/encoder/analyse.h --- x264-0.152.2854+gite9a5903/encoder/analyse.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/analyse.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * analyse.h: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -24,21 +24,32 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_ANALYSE_H -#define X264_ANALYSE_H +#ifndef X264_ENCODER_ANALYSE_H +#define X264_ENCODER_ANALYSE_H +#define x264_analyse_init_costs x264_template(analyse_init_costs) int x264_analyse_init_costs( x264_t *h ); +#define x264_analyse_free_costs x264_template(analyse_free_costs) void x264_analyse_free_costs( x264_t *h ); +#define x264_analyse_weight_frame x264_template(analyse_weight_frame) void x264_analyse_weight_frame( x264_t *h, int end ); +#define x264_macroblock_analyse x264_template(macroblock_analyse) void x264_macroblock_analyse( x264_t *h ); +#define x264_slicetype_decide x264_template(slicetype_decide) void x264_slicetype_decide( x264_t *h ); +#define x264_slicetype_analyse x264_template(slicetype_analyse) void x264_slicetype_analyse( x264_t *h, int intra_minigop ); +#define x264_lookahead_init x264_template(lookahead_init) int x264_lookahead_init( x264_t *h, int i_slicetype_length ); +#define x264_lookahead_is_empty x264_template(lookahead_is_empty) int x264_lookahead_is_empty( x264_t *h ); +#define x264_lookahead_put_frame x264_template(lookahead_put_frame) void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame ); +#define x264_lookahead_get_frames x264_template(lookahead_get_frames) void x264_lookahead_get_frames( x264_t *h ); +#define x264_lookahead_delete x264_template(lookahead_delete) void x264_lookahead_delete( x264_t *h ); #endif diff -Nru x264-0.152.2854+gite9a5903/encoder/api.c x264-0.158.2988+git-20191101.7817004/encoder/api.c --- x264-0.152.2854+gite9a5903/encoder/api.c 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/api.c 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,194 @@ +/***************************************************************************** + * api.c: bit depth independent interface + ***************************************************************************** + * Copyright (C) 2003-2019 x264 project + * + * Authors: Vittorio Giovara + * Luca Barbato + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "common/base.h" + +/**************************************************************************** + * global symbols + ****************************************************************************/ +const int x264_chroma_format = X264_CHROMA_FORMAT; + +x264_t *x264_8_encoder_open( x264_param_t * ); +void x264_8_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); +int x264_8_encoder_reconfig( x264_t *, x264_param_t * ); +void x264_8_encoder_parameters( x264_t *, x264_param_t * ); +int x264_8_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); +int x264_8_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); +void x264_8_encoder_close( x264_t * ); +int x264_8_encoder_delayed_frames( x264_t * ); +int x264_8_encoder_maximum_delayed_frames( x264_t * ); +void x264_8_encoder_intra_refresh( x264_t * ); +int x264_8_encoder_invalidate_reference( x264_t *, int64_t pts ); + +x264_t *x264_10_encoder_open( x264_param_t * ); +void x264_10_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); +int x264_10_encoder_reconfig( x264_t *, x264_param_t * ); +void x264_10_encoder_parameters( x264_t *, x264_param_t * ); +int x264_10_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); +int x264_10_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); +void x264_10_encoder_close( x264_t * ); +int x264_10_encoder_delayed_frames( x264_t * ); +int x264_10_encoder_maximum_delayed_frames( x264_t * ); +void x264_10_encoder_intra_refresh( x264_t * ); +int x264_10_encoder_invalidate_reference( x264_t *, int64_t pts ); + +typedef struct x264_api_t +{ + /* Internal reference to x264_t data */ + x264_t *x264; + + /* API entry points */ + void (*nal_encode)( x264_t *h, uint8_t *dst, x264_nal_t *nal ); + int (*encoder_reconfig)( x264_t *, x264_param_t * ); + void (*encoder_parameters)( x264_t *, x264_param_t * ); + int (*encoder_headers)( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); + int (*encoder_encode)( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); + void (*encoder_close)( x264_t * ); + int (*encoder_delayed_frames)( x264_t * ); + int (*encoder_maximum_delayed_frames)( x264_t * ); + void (*encoder_intra_refresh)( x264_t * ); + int (*encoder_invalidate_reference)( x264_t *, int64_t pts ); +} x264_api_t; + +REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param ) +{ + x264_api_t *api = calloc( 1, sizeof( x264_api_t ) ); + if( !api ) + return NULL; + + if( HAVE_BITDEPTH8 && param->i_bitdepth == 8 ) + { + api->nal_encode = x264_8_nal_encode; + api->encoder_reconfig = x264_8_encoder_reconfig; + api->encoder_parameters = x264_8_encoder_parameters; + api->encoder_headers = x264_8_encoder_headers; + api->encoder_encode = x264_8_encoder_encode; + api->encoder_close = x264_8_encoder_close; + api->encoder_delayed_frames = x264_8_encoder_delayed_frames; + api->encoder_maximum_delayed_frames = x264_8_encoder_maximum_delayed_frames; + api->encoder_intra_refresh = x264_8_encoder_intra_refresh; + api->encoder_invalidate_reference = x264_8_encoder_invalidate_reference; + + api->x264 = x264_8_encoder_open( param ); + } + else if( HAVE_BITDEPTH10 && param->i_bitdepth == 10 ) + { + api->nal_encode = x264_10_nal_encode; + api->encoder_reconfig = x264_10_encoder_reconfig; + api->encoder_parameters = x264_10_encoder_parameters; + api->encoder_headers = x264_10_encoder_headers; + api->encoder_encode = x264_10_encoder_encode; + api->encoder_close = x264_10_encoder_close; + api->encoder_delayed_frames = x264_10_encoder_delayed_frames; + api->encoder_maximum_delayed_frames = x264_10_encoder_maximum_delayed_frames; + api->encoder_intra_refresh = x264_10_encoder_intra_refresh; + api->encoder_invalidate_reference = x264_10_encoder_invalidate_reference; + + api->x264 = x264_10_encoder_open( param ); + } + else + x264_log_internal( X264_LOG_ERROR, "not compiled with %d bit depth support\n", param->i_bitdepth ); + + if( !api->x264 ) + { + free( api ); + return NULL; + } + + /* x264_t is opaque */ + return (x264_t *)api; +} + +REALIGN_STACK void x264_encoder_close( x264_t *h ) +{ + x264_api_t *api = (x264_api_t *)h; + + api->encoder_close( api->x264 ); + free( api ); +} + +REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ) +{ + x264_api_t *api = (x264_api_t *)h; + + api->nal_encode( api->x264, dst, nal ); +} + +REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_reconfig( api->x264, param ); +} + +REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param ) +{ + x264_api_t *api = (x264_api_t *)h; + + api->encoder_parameters( api->x264, param ); +} + +REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal ) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_headers( api->x264, pp_nal, pi_nal ); +} + +REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out ); +} + +REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h ) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_delayed_frames( api->x264 ); +} + +REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h ) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_maximum_delayed_frames( api->x264 ); +} + +REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h ) +{ + x264_api_t *api = (x264_api_t *)h; + + api->encoder_intra_refresh( api->x264 ); +} + +REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts ) +{ + x264_api_t *api = (x264_api_t *)h; + + return api->encoder_invalidate_reference( api->x264, pts ); +} diff -Nru x264-0.152.2854+gite9a5903/encoder/cabac.c x264-0.158.2988+git-20191101.7817004/encoder/cabac.c --- x264-0.152.2854+gite9a5903/encoder/cabac.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/cabac.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: cabac bitstream writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -32,8 +32,8 @@ #define RDO_SKIP_BS 0 #endif -static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type, - int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 ) +static inline void cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type, + int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 ) { if( i_mb_type == I_4x4 || i_mb_type == I_8x8 ) { @@ -67,7 +67,7 @@ } #if !RDO_SKIP_BS -static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb ) +static void cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb ) { int ctx = 0; ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x; @@ -80,7 +80,7 @@ } #endif -static void x264_cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode ) +static void cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode ) { if( i_pred == i_mode ) x264_cabac_encode_decision( cb, 68, 1 ); @@ -95,7 +95,7 @@ } } -static void x264_cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) +static void cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) { int i_mode = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode]; int ctx = 0; @@ -115,7 +115,7 @@ } } -static void x264_cabac_cbp_luma( x264_t *h, x264_cabac_t *cb ) +static void cabac_cbp_luma( x264_t *h, x264_cabac_t *cb ) { int cbp = h->mb.i_cbp_luma; int cbp_l = h->mb.cache.i_cbp_left; @@ -126,7 +126,7 @@ x264_cabac_encode_decision_noup( cb, 76 - ((cbp >> 2) & 1) - ((cbp >> 0) & 2), (cbp >> 3) & 1 ); } -static void x264_cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb ) +static void cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb ) { int cbp_a = h->mb.cache.i_cbp_left & 0x30; int cbp_b = h->mb.cache.i_cbp_top & 0x30; @@ -147,7 +147,7 @@ } } -static void x264_cabac_qp_delta( x264_t *h, x264_cabac_t *cb ) +static void cabac_qp_delta( x264_t *h, x264_cabac_t *cb ) { int i_dqp = h->mb.i_qp - h->mb.i_last_qp; int ctx; @@ -195,7 +195,7 @@ } #endif -static inline void x264_cabac_subpartition_p( x264_cabac_t *cb, int i_sub ) +static inline void cabac_subpartition_p( x264_cabac_t *cb, int i_sub ) { if( i_sub == D_L0_8x8 ) { @@ -212,7 +212,7 @@ } } -static ALWAYS_INLINE void x264_cabac_subpartition_b( x264_cabac_t *cb, int i_sub ) +static ALWAYS_INLINE void cabac_subpartition_b( x264_cabac_t *cb, int i_sub ) { if( i_sub == D_DIRECT_8x8 ) { @@ -232,13 +232,13 @@ x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 ); } -static ALWAYS_INLINE void x264_cabac_transform_size( x264_t *h, x264_cabac_t *cb ) +static ALWAYS_INLINE void cabac_transform_size( x264_t *h, x264_cabac_t *cb ) { int ctx = 399 + h->mb.cache.i_neighbour_transform_size; x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 ); } -static ALWAYS_INLINE void x264_cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe ) +static ALWAYS_INLINE void cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe ) { const int i8 = x264_scan8[idx]; const int i_refa = h->mb.cache.ref[i_list][i8 - 1]; @@ -258,16 +258,16 @@ x264_cabac_encode_decision( cb, 54 + ctx, 0 ); } -static NOINLINE void x264_cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx ) +static NOINLINE void cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx ) { - x264_cabac_ref_internal( h, cb, 0, idx, 0 ); + cabac_ref_internal( h, cb, 0, idx, 0 ); } -static NOINLINE void x264_cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx ) +static NOINLINE void cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx ) { - x264_cabac_ref_internal( h, cb, i_list, idx, 1 ); + cabac_ref_internal( h, cb, i_list, idx, 1 ); } -static ALWAYS_INLINE int x264_cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx ) +static ALWAYS_INLINE int cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx ) { int ctxbase = l ? 47 : 40; @@ -326,7 +326,7 @@ return X264_MIN( i_abs, 66 ); } -static NOINLINE uint16_t x264_cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width ) +static NOINLINE uint16_t cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width ) { ALIGNED_4( int16_t mvp[2] ); int mdx, mdy; @@ -339,46 +339,46 @@ h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]); /* encode */ - mdx = x264_cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF ); - mdy = x264_cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 ); + mdx = cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF ); + mdy = cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 ); return pack8to16(mdx,mdy); } -#define x264_cabac_mvd(h,cb,i_list,idx,width,height)\ +#define cabac_mvd(h,cb,i_list,idx,width,height)\ do\ {\ - uint16_t mvd = x264_cabac_mvd(h,cb,i_list,idx,width);\ + uint16_t mvd = cabac_mvd(h,cb,i_list,idx,width);\ x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\ } while( 0 ) -static inline void x264_cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i ) +static inline void cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i ) { switch( h->mb.i_sub_partition[i] ) { case D_L0_8x8: - x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 ); + cabac_mvd( h, cb, 0, 4*i, 2, 2 ); break; case D_L0_8x4: - x264_cabac_mvd( h, cb, 0, 4*i+0, 2, 1 ); - x264_cabac_mvd( h, cb, 0, 4*i+2, 2, 1 ); + cabac_mvd( h, cb, 0, 4*i+0, 2, 1 ); + cabac_mvd( h, cb, 0, 4*i+2, 2, 1 ); break; case D_L0_4x8: - x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 2 ); - x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 2 ); + cabac_mvd( h, cb, 0, 4*i+0, 1, 2 ); + cabac_mvd( h, cb, 0, 4*i+1, 1, 2 ); break; case D_L0_4x4: - x264_cabac_mvd( h, cb, 0, 4*i+0, 1, 1 ); - x264_cabac_mvd( h, cb, 0, 4*i+1, 1, 1 ); - x264_cabac_mvd( h, cb, 0, 4*i+2, 1, 1 ); - x264_cabac_mvd( h, cb, 0, 4*i+3, 1, 1 ); + cabac_mvd( h, cb, 0, 4*i+0, 1, 1 ); + cabac_mvd( h, cb, 0, 4*i+1, 1, 1 ); + cabac_mvd( h, cb, 0, 4*i+2, 1, 1 ); + cabac_mvd( h, cb, 0, 4*i+3, 1, 1 ); break; default: assert(0); } } -static ALWAYS_INLINE void x264_cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma ) +static ALWAYS_INLINE void cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma ) { if( slice_type == SLICE_TYPE_I ) { @@ -388,7 +388,7 @@ if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 ) ctx++; - x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 ); + cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 ); } else if( slice_type == SLICE_TYPE_P ) { @@ -396,7 +396,7 @@ x264_cabac_encode_decision_noup( cb, 14, 1 ); /* suffix */ - x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 ); + cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 ); } else if( slice_type == SLICE_TYPE_B ) { @@ -408,7 +408,7 @@ x264_cabac_encode_decision( cb, 27+5, 1 ); /* suffix */ - x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 ); + cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 ); } if( i_mb_type == I_PCM ) @@ -417,22 +417,22 @@ if( i_mb_type != I_16x16 ) { if( h->pps->b_transform_8x8_mode ) - x264_cabac_transform_size( h, cb ); + cabac_transform_size( h, cb ); int di = h->mb.b_transform_8x8 ? 4 : 1; for( int i = 0; i < 16; i += di ) { const int i_pred = x264_mb_predict_intra4x4_mode( h, i ); const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] ); - x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); + cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); } } if( chroma ) - x264_cabac_intra_chroma_pred_mode( h, cb ); + cabac_intra_chroma_pred_mode( h, cb ); } -static ALWAYS_INLINE void x264_cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma ) +static ALWAYS_INLINE void cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma ) { if( i_mb_type == P_L0 ) { @@ -442,8 +442,8 @@ x264_cabac_encode_decision_noup( cb, 15, 0 ); x264_cabac_encode_decision_noup( cb, 16, 0 ); if( h->mb.pic.i_fref[0] > 1 ) - x264_cabac_ref_p( h, cb, 0 ); - x264_cabac_mvd( h, cb, 0, 0, 4, 4 ); + cabac_ref_p( h, cb, 0 ); + cabac_mvd( h, cb, 0, 0, 4, 4 ); } else if( h->mb.i_partition == D_16x8 ) { @@ -451,11 +451,11 @@ x264_cabac_encode_decision_noup( cb, 17, 1 ); if( h->mb.pic.i_fref[0] > 1 ) { - x264_cabac_ref_p( h, cb, 0 ); - x264_cabac_ref_p( h, cb, 8 ); + cabac_ref_p( h, cb, 0 ); + cabac_ref_p( h, cb, 8 ); } - x264_cabac_mvd( h, cb, 0, 0, 4, 2 ); - x264_cabac_mvd( h, cb, 0, 8, 4, 2 ); + cabac_mvd( h, cb, 0, 0, 4, 2 ); + cabac_mvd( h, cb, 0, 8, 4, 2 ); } else //if( h->mb.i_partition == D_8x16 ) { @@ -463,11 +463,11 @@ x264_cabac_encode_decision_noup( cb, 17, 0 ); if( h->mb.pic.i_fref[0] > 1 ) { - x264_cabac_ref_p( h, cb, 0 ); - x264_cabac_ref_p( h, cb, 4 ); + cabac_ref_p( h, cb, 0 ); + cabac_ref_p( h, cb, 4 ); } - x264_cabac_mvd( h, cb, 0, 0, 2, 4 ); - x264_cabac_mvd( h, cb, 0, 4, 2, 4 ); + cabac_mvd( h, cb, 0, 0, 2, 4 ); + cabac_mvd( h, cb, 0, 4, 2, 4 ); } } else if( i_mb_type == P_8x8 ) @@ -478,25 +478,25 @@ /* sub mb type */ for( int i = 0; i < 4; i++ ) - x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i] ); + cabac_subpartition_p( cb, h->mb.i_sub_partition[i] ); /* ref 0 */ if( h->mb.pic.i_fref[0] > 1 ) { - x264_cabac_ref_p( h, cb, 0 ); - x264_cabac_ref_p( h, cb, 4 ); - x264_cabac_ref_p( h, cb, 8 ); - x264_cabac_ref_p( h, cb, 12 ); + cabac_ref_p( h, cb, 0 ); + cabac_ref_p( h, cb, 4 ); + cabac_ref_p( h, cb, 8 ); + cabac_ref_p( h, cb, 12 ); } for( int i = 0; i < 4; i++ ) - x264_cabac_8x8_mvd( h, cb, i ); + cabac_8x8_mvd( h, cb, i ); } else /* intra */ - x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma ); + cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma ); } -static ALWAYS_INLINE void x264_cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma ) +static ALWAYS_INLINE void cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma ) { int ctx = 0; if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT ) @@ -521,26 +521,26 @@ /* sub mb type */ for( int i = 0; i < 4; i++ ) - x264_cabac_subpartition_b( cb, h->mb.i_sub_partition[i] ); + cabac_subpartition_b( cb, h->mb.i_sub_partition[i] ); /* ref */ if( h->mb.pic.i_fref[0] > 1 ) for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) - x264_cabac_ref_b( h, cb, 0, 4*i ); + cabac_ref_b( h, cb, 0, 4*i ); if( h->mb.pic.i_fref[1] > 1 ) for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) - x264_cabac_ref_b( h, cb, 1, 4*i ); + cabac_ref_b( h, cb, 1, 4*i ); for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) - x264_cabac_mvd( h, cb, 0, 4*i, 2, 2 ); + cabac_mvd( h, cb, 0, 4*i, 2, 2 ); for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) - x264_cabac_mvd( h, cb, 1, 4*i, 2, 2 ); + cabac_mvd( h, cb, 1, 4*i, 2, 2 ); } else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI ) { @@ -576,40 +576,40 @@ if( h->mb.pic.i_fref[0] > 1 ) { if( b_list[0][0] ) - x264_cabac_ref_b( h, cb, 0, 0 ); + cabac_ref_b( h, cb, 0, 0 ); if( b_list[0][1] && h->mb.i_partition != D_16x16 ) - x264_cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) ); + cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) ); } if( h->mb.pic.i_fref[1] > 1 ) { if( b_list[1][0] ) - x264_cabac_ref_b( h, cb, 1, 0 ); + cabac_ref_b( h, cb, 1, 0 ); if( b_list[1][1] && h->mb.i_partition != D_16x16 ) - x264_cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) ); + cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) ); } for( int i_list = 0; i_list < 2; i_list++ ) { if( h->mb.i_partition == D_16x16 ) { - if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 4 ); + if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 4 ); } else if( h->mb.i_partition == D_16x8 ) { - if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 4, 2 ); - if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 8, 4, 2 ); + if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 2 ); + if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 8, 4, 2 ); } else //if( h->mb.i_partition == D_8x16 ) { - if( b_list[i_list][0] ) x264_cabac_mvd( h, cb, i_list, 0, 2, 4 ); - if( b_list[i_list][1] ) x264_cabac_mvd( h, cb, i_list, 4, 2, 4 ); + if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 2, 4 ); + if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 4, 2, 4 ); } } } else /* intra */ - x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma ); + cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma ); } -static int ALWAYS_INLINE x264_cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc ) +static ALWAYS_INLINE int cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc ) { static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020}; @@ -644,53 +644,6 @@ } } -#if !RDO_SKIP_BS -extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][64]; -extern const uint8_t x264_last_coeff_flag_offset_8x8[63]; -extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7]; -extern const uint16_t x264_significant_coeff_flag_offset[2][16]; -extern const uint16_t x264_last_coeff_flag_offset[2][16]; -extern const uint16_t x264_coeff_abs_level_m1_offset[16]; -extern const uint8_t x264_count_cat_m1[14]; -#else -/* Padded to [64] for easier addressing */ -const uint8_t x264_significant_coeff_flag_offset_8x8[2][64] = -{{ - 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, - 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, - 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, - 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 -},{ - 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, - 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11, - 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, - 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 -}}; -const uint8_t x264_last_coeff_flag_offset_8x8[63] = -{ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 -}; -const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */ -const uint16_t x264_significant_coeff_flag_offset[2][16] = -{ - { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718, 0, 0 }, - { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733, 0, 0 } -}; -const uint16_t x264_last_coeff_flag_offset[2][16] = -{ - { 166+0, 166+15, 166+29, 166+44, 166+47, 417, 572+0, 572+15, 572+29, 690, 616+0, 616+15, 616+29, 748, 0, 0 }, - { 338+0, 338+15, 338+29, 338+44, 338+47, 451, 864+0, 864+15, 864+29, 699, 908+0, 908+15, 908+29, 757, 0, 0 } -}; -const uint16_t x264_coeff_abs_level_m1_offset[16] = -{ - 227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766 -}; -const uint8_t x264_count_cat_m1[14] = {15, 14, 15, 3, 14, 63, 15, 14, 15, 63, 15, 14, 15, 63}; -#endif - // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). // 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). /* map node ctx => cabac ctx for level=1 */ @@ -709,7 +662,7 @@ }; #if !RDO_SKIP_BS -static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc ) +static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc ) { int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat]; int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat]; @@ -796,10 +749,10 @@ void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { - x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 ); + cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 ); } -static void ALWAYS_INLINE x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) +static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ ) h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb ); @@ -807,19 +760,19 @@ x264_cabac_block_residual_c( h, cb, ctx_block_cat, l ); #endif } -static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) +static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { /* Template a version specifically for chroma 4:2:2 DC in order to avoid * slowing down everything else due to the added complexity. */ - x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 ); + cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 ); } -#define x264_cabac_block_residual_8x8( h, cb, cat, l ) x264_cabac_block_residual( h, cb, cat, l ) +#define cabac_block_residual_8x8( h, cb, cat, l ) cabac_block_residual( h, cb, cat, l ) #else /* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is * slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */ -static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc ) +static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc ) { const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED]; int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat]; @@ -906,14 +859,14 @@ void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { - x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 ); + cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 ); } void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { - x264_cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 ); + cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 ); } -static ALWAYS_INLINE void x264_cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) +static ALWAYS_INLINE void cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ ) h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb ); @@ -921,7 +874,7 @@ x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l ); #endif } -static ALWAYS_INLINE void x264_cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) +static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ ) h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb ); @@ -930,38 +883,38 @@ #endif } -static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) +static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ) { - x264_cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 ); + cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 ); } #endif -#define x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\ +#define cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\ do\ {\ - int ctxidxinc = x264_cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\ + int ctxidxinc = cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\ if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\ {\ x264_cabac_encode_decision( cb, ctxidxinc, 1 );\ - x264_cabac_block_residual##name( h, cb, ctx_block_cat, l );\ + cabac_block_residual##name( h, cb, ctx_block_cat, l );\ }\ else\ x264_cabac_encode_decision( cb, ctxidxinc, 0 );\ } while( 0 ) -#define x264_cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ - x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, ) +#define cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ + cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, ) -#define x264_cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ - x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, ) +#define cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ + cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, ) -#define x264_cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ - x264_cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 ) +#define cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\ + cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 ) -#define x264_cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\ - x264_cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc ) +#define cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\ + cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc ) -static ALWAYS_INLINE void x264_macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma ) +static ALWAYS_INLINE void macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma ) { const int i_mb_type = h->mb.i_type; @@ -972,16 +925,16 @@ if( SLICE_MBAFF && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { - x264_cabac_field_decoding_flag( h, cb ); + cabac_field_decoding_flag( h, cb ); } #endif if( h->sh.i_type == SLICE_TYPE_P ) - x264_cabac_mb_header_p( h, cb, i_mb_type, chroma ); + cabac_mb_header_p( h, cb, i_mb_type, chroma ); else if( h->sh.i_type == SLICE_TYPE_B ) - x264_cabac_mb_header_b( h, cb, i_mb_type, chroma ); + cabac_mb_header_b( h, cb, i_mb_type, chroma ); else //if( h->sh.i_type == SLICE_TYPE_I ) - x264_cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma ); + cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma ); #if !RDO_SKIP_BS i_mb_pos_tex = x264_cabac_pos( cb ); @@ -1012,20 +965,20 @@ if( i_mb_type != I_16x16 ) { - x264_cabac_cbp_luma( h, cb ); + cabac_cbp_luma( h, cb ); if( chroma ) - x264_cabac_cbp_chroma( h, cb ); + cabac_cbp_chroma( h, cb ); } if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma ) { - x264_cabac_transform_size( h, cb ); + cabac_transform_size( h, cb ); } if( h->mb.i_cbp_luma || (chroma && h->mb.i_cbp_chroma) || i_mb_type == I_16x16 ) { const int b_intra = IS_INTRA( i_mb_type ); - x264_cabac_qp_delta( h, cb ); + cabac_qp_delta( h, cb ); /* write residual */ if( i_mb_type == I_16x16 ) @@ -1033,12 +986,12 @@ /* DC Luma */ for( int p = 0; p < plane_count; p++ ) { - x264_cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 ); + cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 ); /* AC Luma */ if( h->mb.i_cbp_luma ) for( int i = p*16; i < p*16+16; i++ ) - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 ); } } else if( h->mb.b_transform_8x8 ) @@ -1086,14 +1039,14 @@ for( int p = 0; p < 3; p++ ) FOREACH_BIT( i, 0, h->mb.i_cbp_luma ) - x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra ); + cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra ); MUNGE_8x8_NNZ( RESTORE ) } else { FOREACH_BIT( i, 0, h->mb.i_cbp_luma ) - x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] ); + cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] ); } } else @@ -1101,20 +1054,20 @@ for( int p = 0; p < plane_count; p++ ) FOREACH_BIT( i8x8, 0, h->mb.i_cbp_luma ) for( int i = 0; i < 4; i++ ) - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra ); } if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */ { if( CHROMA_FORMAT == CHROMA_422 ) { - x264_cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra ); - x264_cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra ); + cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra ); + cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra ); } else { - x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra ); - x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra ); + cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra ); + cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra ); } if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */ @@ -1122,7 +1075,7 @@ int step = 8 << CHROMA_V_SHIFT; for( int i = 16; i < 3*16; i += step ) for( int j = i; j < i+4; j++ ) - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra ); } } } @@ -1135,9 +1088,11 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb ) { if( CHROMA444 ) - x264_macroblock_write_cabac_internal( h, cb, 3, 0 ); + macroblock_write_cabac_internal( h, cb, 3, 0 ); + else if( CHROMA_FORMAT ) + macroblock_write_cabac_internal( h, cb, 1, 1 ); else - x264_macroblock_write_cabac_internal( h, cb, 1, 1 ); + macroblock_write_cabac_internal( h, cb, 1, 0 ); } #if RDO_SKIP_BS @@ -1148,7 +1103,7 @@ * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO * works on all partition sizes except 16x16 *****************************************************************************/ -static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel ) +static void partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel ) { const int i_mb_type = h->mb.i_type; int b_8x16 = h->mb.i_partition == D_8x16; @@ -1156,22 +1111,22 @@ if( i_mb_type == P_8x8 ) { - x264_cabac_8x8_mvd( h, cb, i8 ); - x264_cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] ); + cabac_8x8_mvd( h, cb, i8 ); + cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] ); } else if( i_mb_type == P_L0 ) - x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<>b_8x16, 2< B_DIRECT && i_mb_type < B_8x8 ) { - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<>b_8x16, 2<>b_8x16, 2<>b_8x16, 2<mb.i_sub_partition[i8] ] ) - x264_cabac_mvd( h, cb, 0, 4*i8, 2, 2 ); + cabac_mvd( h, cb, 0, 4*i8, 2, 2 ); if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) - x264_cabac_mvd( h, cb, 1, 4*i8, 2, 2 ); + cabac_mvd( h, cb, 1, 4*i8, 2, 2 ); } for( int j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) @@ -1182,14 +1137,14 @@ { if( CHROMA444 ) for( int p = 0; p < 3; p++ ) - x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 ); + cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 ); else - x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] ); + cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] ); } else for( int p = 0; p < plane_count; p++ ) for( int i4 = 0; i4 < 4; i4++ ) - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 ); } if( h->mb.i_cbp_chroma ) @@ -1197,15 +1152,15 @@ if( CHROMA_FORMAT == CHROMA_422 ) { int offset = (5*i8) & 0x09; - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 ); - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 ); - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 ); - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 ); } else { - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 ); - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 ); } } @@ -1213,63 +1168,63 @@ } } -static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel ) +static void subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel ) { int b_8x4 = i_pixel == PIXEL_8x4; int plane_count = CHROMA444 ? 3 : 1; if( i_pixel == PIXEL_4x4 ) - x264_cabac_mvd( h, cb, 0, i4, 1, 1 ); + cabac_mvd( h, cb, 0, i4, 1, 1 ); else - x264_cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 ); + cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 ); for( int p = 0; p < plane_count; p++ ) { - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 ); if( i_pixel != PIXEL_4x4 ) - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 ); } } -static void x264_partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode ) +static void partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode ) { const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 ); i_mode = x264_mb_pred_mode4x4_fix( i_mode ); - x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); - x264_cabac_cbp_luma( h, cb ); + cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); + cabac_cbp_luma( h, cb ); if( h->mb.i_cbp_luma & (1 << i8) ) { if( CHROMA444 ) for( int p = 0; p < 3; p++ ) - x264_cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 ); + cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 ); else - x264_cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] ); + cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] ); } } -static void x264_partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode ) +static void partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode ) { const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 ); int plane_count = CHROMA444 ? 3 : 1; i_mode = x264_mb_pred_mode4x4_fix( i_mode ); - x264_cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); + cabac_intra4x4_pred_mode( cb, i_pred, i_mode ); for( int p = 0; p < plane_count; p++ ) - x264_cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 ); + cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 ); } -static void x264_chroma_size_cabac( x264_t *h, x264_cabac_t *cb ) +static void chroma_size_cabac( x264_t *h, x264_cabac_t *cb ) { - x264_cabac_intra_chroma_pred_mode( h, cb ); - x264_cabac_cbp_chroma( h, cb ); + cabac_intra_chroma_pred_mode( h, cb ); + cabac_cbp_chroma( h, cb ); if( h->mb.i_cbp_chroma ) { if( CHROMA_FORMAT == CHROMA_422 ) { - x264_cabac_block_residual_422_dc_cbf( h, cb, 0, 1 ); - x264_cabac_block_residual_422_dc_cbf( h, cb, 1, 1 ); + cabac_block_residual_422_dc_cbf( h, cb, 0, 1 ); + cabac_block_residual_422_dc_cbf( h, cb, 1, 1 ); } else { - x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 ); - x264_cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 ); + cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 ); + cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 ); } if( h->mb.i_cbp_chroma == 2 ) @@ -1277,7 +1232,7 @@ int step = 8 << CHROMA_V_SHIFT; for( int i = 16; i < 3*16; i += step ) for( int j = i; j < i+4; j++ ) - x264_cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 ); + cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 ); } } } diff -Nru x264-0.152.2854+gite9a5903/encoder/cavlc.c x264-0.158.2988+git-20191101.7817004/encoder/cavlc.c --- x264-0.152.2854+gite9a5903/encoder/cavlc.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/cavlc.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cavlc.c: cavlc bitstream writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -67,7 +67,7 @@ /**************************************************************************** * x264_cavlc_block_residual: ****************************************************************************/ -static inline int x264_cavlc_block_residual_escape( x264_t *h, int i_suffix_length, int level ) +static inline int cavlc_block_residual_escape( x264_t *h, int i_suffix_length, int level ) { bs_t *s = &h->out.bs; static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff }; @@ -118,7 +118,7 @@ return i_suffix_length; } -static int x264_cavlc_block_residual_internal( x264_t *h, int ctx_block_cat, dctcoef *l, int nC ) +static int cavlc_block_residual_internal( x264_t *h, int ctx_block_cat, dctcoef *l, int nC ) { bs_t *s = &h->out.bs; static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0}; @@ -163,7 +163,7 @@ i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next; } else - i_suffix_length = x264_cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 ); + i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 ); for( int i = i_trailing+1; i < i_total; i++ ) { val = runlevel.level[i] + LEVEL_TABLE_SIZE/2; @@ -173,7 +173,7 @@ i_suffix_length = x264_level_token[i_suffix_length][val].i_next; } else - i_suffix_length = x264_cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 ); + i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 ); } } @@ -205,10 +205,10 @@ if( !*nnz )\ bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\ else\ - *nnz = x264_cavlc_block_residual_internal(h,cat,l,nC);\ + *nnz = cavlc_block_residual_internal(h,cat,l,nC);\ } -static void x264_cavlc_qp_delta( x264_t *h ) +static void cavlc_qp_delta( x264_t *h ) { bs_t *s = &h->out.bs; int i_dqp = h->mb.i_qp - h->mb.i_last_qp; @@ -238,7 +238,7 @@ bs_write_se( s, i_dqp ); } -static void x264_cavlc_mvd( x264_t *h, int i_list, int idx, int width ) +static void cavlc_mvd( x264_t *h, int i_list, int idx, int width ) { bs_t *s = &h->out.bs; ALIGNED_4( int16_t mvp[2] ); @@ -247,31 +247,31 @@ bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] ); } -static inline void x264_cavlc_8x8_mvd( x264_t *h, int i ) +static inline void cavlc_8x8_mvd( x264_t *h, int i ) { switch( h->mb.i_sub_partition[i] ) { case D_L0_8x8: - x264_cavlc_mvd( h, 0, 4*i, 2 ); + cavlc_mvd( h, 0, 4*i, 2 ); break; case D_L0_8x4: - x264_cavlc_mvd( h, 0, 4*i+0, 2 ); - x264_cavlc_mvd( h, 0, 4*i+2, 2 ); + cavlc_mvd( h, 0, 4*i+0, 2 ); + cavlc_mvd( h, 0, 4*i+2, 2 ); break; case D_L0_4x8: - x264_cavlc_mvd( h, 0, 4*i+0, 1 ); - x264_cavlc_mvd( h, 0, 4*i+1, 1 ); + cavlc_mvd( h, 0, 4*i+0, 1 ); + cavlc_mvd( h, 0, 4*i+1, 1 ); break; case D_L0_4x4: - x264_cavlc_mvd( h, 0, 4*i+0, 1 ); - x264_cavlc_mvd( h, 0, 4*i+1, 1 ); - x264_cavlc_mvd( h, 0, 4*i+2, 1 ); - x264_cavlc_mvd( h, 0, 4*i+3, 1 ); + cavlc_mvd( h, 0, 4*i+0, 1 ); + cavlc_mvd( h, 0, 4*i+1, 1 ); + cavlc_mvd( h, 0, 4*i+2, 1 ); + cavlc_mvd( h, 0, 4*i+3, 1 ); break; } } -static ALWAYS_INLINE void x264_cavlc_macroblock_luma_residual( x264_t *h, int plane_count ) +static ALWAYS_INLINE void cavlc_macroblock_luma_residual( x264_t *h, int plane_count ) { if( h->mb.b_transform_8x8 ) { @@ -290,9 +290,9 @@ } #if RDO_SKIP_BS -static ALWAYS_INLINE void x264_cavlc_partition_luma_residual( x264_t *h, int i8, int p ) +static ALWAYS_INLINE void cavlc_partition_luma_residual( x264_t *h, int i8, int p ) { - if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4]] ) + if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] ) h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4+p*16], h->dct.luma8x8[i8+p*4], &h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] ); @@ -302,7 +302,7 @@ } #endif -static void x264_cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma ) +static void cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma ) { bs_t *s = &h->out.bs; if( i_mb_type == I_16x16 ) @@ -334,7 +334,7 @@ bs_write_ue( s, x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] ); } -static ALWAYS_INLINE void x264_cavlc_mb_header_p( x264_t *h, int i_mb_type, int chroma ) +static ALWAYS_INLINE void cavlc_mb_header_p( x264_t *h, int i_mb_type, int chroma ) { bs_t *s = &h->out.bs; if( i_mb_type == P_L0 ) @@ -345,7 +345,7 @@ if( h->mb.pic.i_fref[0] > 1 ) bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); - x264_cavlc_mvd( h, 0, 0, 4 ); + cavlc_mvd( h, 0, 0, 4 ); } else if( h->mb.i_partition == D_16x8 ) { @@ -355,8 +355,8 @@ bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] ); } - x264_cavlc_mvd( h, 0, 0, 4 ); - x264_cavlc_mvd( h, 0, 8, 4 ); + cavlc_mvd( h, 0, 0, 4 ); + cavlc_mvd( h, 0, 8, 4 ); } else if( h->mb.i_partition == D_8x16 ) { @@ -366,8 +366,8 @@ bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] ); bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] ); } - x264_cavlc_mvd( h, 0, 0, 2 ); - x264_cavlc_mvd( h, 0, 4, 2 ); + cavlc_mvd( h, 0, 0, 2 ); + cavlc_mvd( h, 0, 4, 2 ); } } else if( i_mb_type == P_8x8 ) @@ -402,13 +402,13 @@ } for( int i = 0; i < 4; i++ ) - x264_cavlc_8x8_mvd( h, i ); + cavlc_8x8_mvd( h, i ); } else //if( IS_INTRA( i_mb_type ) ) - x264_cavlc_mb_header_i( h, i_mb_type, 5, chroma ); + cavlc_mb_header_i( h, i_mb_type, 5, chroma ); } -static ALWAYS_INLINE void x264_cavlc_mb_header_b( x264_t *h, int i_mb_type, int chroma ) +static ALWAYS_INLINE void cavlc_mb_header_b( x264_t *h, int i_mb_type, int chroma ) { bs_t *s = &h->out.bs; if( i_mb_type == B_8x8 ) @@ -432,10 +432,10 @@ /* mvd */ for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] ) - x264_cavlc_mvd( h, 0, 4*i, 2 ); + cavlc_mvd( h, 0, 4*i, 2 ); for( int i = 0; i < 4; i++ ) if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] ) - x264_cavlc_mvd( h, 1, 4*i, 2 ); + cavlc_mvd( h, 1, 4*i, 2 ); } else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI ) { @@ -450,8 +450,8 @@ { if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] ); if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] ); - if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 4 ); - if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 4 ); + if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 ); + if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 ); } else { @@ -461,24 +461,24 @@ if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] ); if( h->mb.i_partition == D_16x8 ) { - if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 4 ); - if( b_list[0][1] ) x264_cavlc_mvd( h, 0, 8, 4 ); - if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 4 ); - if( b_list[1][1] ) x264_cavlc_mvd( h, 1, 8, 4 ); + if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 ); + if( b_list[0][1] ) cavlc_mvd( h, 0, 8, 4 ); + if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 ); + if( b_list[1][1] ) cavlc_mvd( h, 1, 8, 4 ); } else //if( h->mb.i_partition == D_8x16 ) { - if( b_list[0][0] ) x264_cavlc_mvd( h, 0, 0, 2 ); - if( b_list[0][1] ) x264_cavlc_mvd( h, 0, 4, 2 ); - if( b_list[1][0] ) x264_cavlc_mvd( h, 1, 0, 2 ); - if( b_list[1][1] ) x264_cavlc_mvd( h, 1, 4, 2 ); + if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 2 ); + if( b_list[0][1] ) cavlc_mvd( h, 0, 4, 2 ); + if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 2 ); + if( b_list[1][1] ) cavlc_mvd( h, 1, 4, 2 ); } } } else if( i_mb_type == B_DIRECT ) bs_write1( s, 1 ); else //if( IS_INTRA( i_mb_type ) ) - x264_cavlc_mb_header_i( h, i_mb_type, 23, chroma ); + cavlc_mb_header_i( h, i_mb_type, 23, chroma ); } /***************************************************************************** @@ -489,7 +489,7 @@ bs_t *s = &h->out.bs; const int i_mb_type = h->mb.i_type; int plane_count = CHROMA444 ? 3 : 1; - int chroma = !CHROMA444; + int chroma = CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422; #if RDO_SKIP_BS s->i_bits_encoded = 0; @@ -536,11 +536,11 @@ #endif if( h->sh.i_type == SLICE_TYPE_P ) - x264_cavlc_mb_header_p( h, i_mb_type, chroma ); + cavlc_mb_header_p( h, i_mb_type, chroma ); else if( h->sh.i_type == SLICE_TYPE_B ) - x264_cavlc_mb_header_b( h, i_mb_type, chroma ); + cavlc_mb_header_b( h, i_mb_type, chroma ); else //if( h->sh.i_type == SLICE_TYPE_I ) - x264_cavlc_mb_header_i( h, i_mb_type, 0, chroma ); + cavlc_mb_header_i( h, i_mb_type, 0, chroma ); #if !RDO_SKIP_BS i_mb_pos_tex = bs_pos( s ); @@ -557,7 +557,7 @@ if( i_mb_type == I_16x16 ) { - x264_cavlc_qp_delta( h ); + cavlc_qp_delta( h ); /* DC Luma */ for( int p = 0; p < plane_count; p++ ) @@ -572,8 +572,8 @@ } else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma ) { - x264_cavlc_qp_delta( h ); - x264_cavlc_macroblock_luma_residual( h, plane_count ); + cavlc_qp_delta( h ); + cavlc_macroblock_luma_residual( h, plane_count ); } if( h->mb.i_cbp_chroma ) { @@ -602,7 +602,7 @@ * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO * works on all partition sizes except 16x16 *****************************************************************************/ -static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel ) +static int partition_size_cavlc( x264_t *h, int i8, int i_pixel ) { bs_t *s = &h->out.bs; const int i_mb_type = h->mb.i_type; @@ -614,28 +614,28 @@ if( i_mb_type == P_8x8 ) { - x264_cavlc_8x8_mvd( h, i8 ); + cavlc_8x8_mvd( h, i8 ); bs_write_ue( s, subpartition_p_to_golomb[ h->mb.i_sub_partition[i8] ] ); } else if( i_mb_type == P_L0 ) - x264_cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 ); + cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 ); else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 ) { - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 ); - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mvd( h, 1, 4*i8, 4>>b_8x16 ); + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 ); + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mvd( h, 1, 4*i8, 4>>b_8x16 ); } else //if( i_mb_type == B_8x8 ) { if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] ) - x264_cavlc_mvd( h, 0, 4*i8, 2 ); + cavlc_mvd( h, 0, 4*i8, 2 ); if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] ) - x264_cavlc_mvd( h, 1, 4*i8, 2 ); + cavlc_mvd( h, 1, 4*i8, 2 ); } for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- ) { for( int p = 0; p < plane_count; p++ ) - x264_cavlc_partition_luma_residual( h, i8, p ); + cavlc_partition_luma_residual( h, i8, p ); if( h->mb.i_cbp_chroma ) { if( CHROMA_FORMAT == CHROMA_422 ) @@ -658,12 +658,12 @@ return h->out.bs.i_bits_encoded; } -static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel ) +static int subpartition_size_cavlc( x264_t *h, int i4, int i_pixel ) { int plane_count = CHROMA444 ? 3 : 1; int b_8x4 = i_pixel == PIXEL_8x4; h->out.bs.i_bits_encoded = 0; - x264_cavlc_mvd( h, 0, i4, 1+b_8x4 ); + cavlc_mvd( h, 0, i4, 1+b_8x4 ); for( int p = 0; p < plane_count; p++ ) { x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] ); @@ -674,7 +674,7 @@ return h->out.bs.i_bits_encoded; } -static int x264_cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode ) +static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode ) { if( x264_mb_predict_intra4x4_mode( h, i4 ) == x264_mb_pred_mode4x4_fix( i_mode ) ) return 1; @@ -682,26 +682,26 @@ return 4; } -static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) +static int partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode ) { int plane_count = CHROMA444 ? 3 : 1; - h->out.bs.i_bits_encoded = x264_cavlc_intra4x4_pred_size( h, 4*i8, i_mode ); + h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode ); bs_write_ue( &h->out.bs, cbp_to_golomb[!CHROMA444][1][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] ); for( int p = 0; p < plane_count; p++ ) - x264_cavlc_partition_luma_residual( h, i8, p ); + cavlc_partition_luma_residual( h, i8, p ); return h->out.bs.i_bits_encoded; } -static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) +static int partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode ) { int plane_count = CHROMA444 ? 3 : 1; - h->out.bs.i_bits_encoded = x264_cavlc_intra4x4_pred_size( h, i4, i_mode ); + h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode ); for( int p = 0; p < plane_count; p++ ) x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] ); return h->out.bs.i_bits_encoded; } -static int x264_chroma_size_cavlc( x264_t *h ) +static int chroma_size_cavlc( x264_t *h ) { h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] ); if( h->mb.i_cbp_chroma ) diff -Nru x264-0.152.2854+gite9a5903/encoder/encoder.c x264-0.158.2988+git-20191101.7817004/encoder/encoder.c --- x264-0.152.2854+gite9a5903/encoder/encoder.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/encoder.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * encoder.c: top-level encoder functions ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -40,16 +40,20 @@ #define bs_write_ue bs_write_ue_big -static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, - x264_nal_t **pp_nal, int *pi_nal, - x264_picture_t *pic_out ); +// forward declaration needed for template usage +void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); +void x264_macroblock_cache_load_progressive( x264_t *h, int i_mb_x, int i_mb_y ); + +static int encoder_frame_end( x264_t *h, x264_t *thread_current, + x264_nal_t **pp_nal, int *pi_nal, + x264_picture_t *pic_out ); /**************************************************************************** * ******************************* x264 libs ********************************** * ****************************************************************************/ -static double x264_psnr( double sqe, double size ) +static double calc_psnr( double sqe, double size ) { double mse = sqe / (PIXEL_MAX*PIXEL_MAX * size); if( mse <= 0.0000000001 ) /* Max 100dB */ @@ -58,7 +62,7 @@ return -10.0 * log10( mse ); } -static double x264_ssim( double ssim ) +static double calc_ssim_db( double ssim ) { double inv_ssim = 1 - ssim; if( inv_ssim <= 0.0000000001 ) /* Max 100dB */ @@ -67,7 +71,7 @@ return -10.0 * log10( inv_ssim ); } -static int x264_threadpool_wait_all( x264_t *h ) +static int threadpool_wait_all( x264_t *h ) { for( int i = 0; i < h->param.i_threads; i++ ) if( h->thread[i]->b_thread_active ) @@ -79,7 +83,7 @@ return 0; } -static void x264_frame_dump( x264_t *h ) +static void frame_dump( x264_t *h ) { FILE *f = x264_fopen( h->param.psz_dump_yuv, "r+b" ); if( !f ) @@ -87,7 +91,7 @@ /* Wait for the threads to finish deblocking */ if( h->param.b_sliced_threads ) - x264_threadpool_wait_all( h ); + threadpool_wait_all( h ); /* Write the frame in display order */ int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * sizeof(pixel) ); @@ -96,7 +100,7 @@ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) for( int y = 0; y < h->param.i_height; y++ ) fwrite( &h->fdec->plane[p][y*h->fdec->i_stride[p]], sizeof(pixel), h->param.i_width, f ); - if( !CHROMA444 ) + if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 ) { int cw = h->param.i_width>>1; int ch = h->param.i_height>>CHROMA_V_SHIFT; @@ -115,9 +119,9 @@ } /* Fill "default" values */ -static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh, - x264_sps_t *sps, x264_pps_t *pps, - int i_idr_pic_id, int i_frame, int i_qp ) +static void slice_header_init( x264_t *h, x264_slice_header_t *sh, + x264_sps_t *sps, x264_pps_t *pps, + int i_idr_pic_id, int i_frame, int i_qp ) { x264_param_t *param = &h->param; @@ -206,7 +210,7 @@ sh->i_beta_offset = param->i_deblocking_filter_beta << 1; } -static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc ) +static void slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal_ref_idc ) { if( sh->b_mbaff ) { @@ -289,25 +293,29 @@ { sh->b_weighted_pred = sh->weight[0][0].weightfn || sh->weight[0][1].weightfn || sh->weight[0][2].weightfn; /* pred_weight_table() */ - bs_write_ue( s, sh->weight[0][0].i_denom ); - bs_write_ue( s, sh->weight[0][1].i_denom ); + bs_write_ue( s, sh->weight[0][0].i_denom ); /* luma_log2_weight_denom */ + if( sh->sps->i_chroma_format_idc ) + bs_write_ue( s, sh->weight[0][1].i_denom ); /* chroma_log2_weight_denom */ for( int i = 0; i < sh->i_num_ref_idx_l0_active; i++ ) { int luma_weight_l0_flag = !!sh->weight[i][0].weightfn; - int chroma_weight_l0_flag = !!sh->weight[i][1].weightfn || !!sh->weight[i][2].weightfn; bs_write1( s, luma_weight_l0_flag ); if( luma_weight_l0_flag ) { bs_write_se( s, sh->weight[i][0].i_scale ); bs_write_se( s, sh->weight[i][0].i_offset ); } - bs_write1( s, chroma_weight_l0_flag ); - if( chroma_weight_l0_flag ) + if( sh->sps->i_chroma_format_idc ) { - for( int j = 1; j < 3; j++ ) + int chroma_weight_l0_flag = sh->weight[i][1].weightfn || sh->weight[i][2].weightfn; + bs_write1( s, chroma_weight_l0_flag ); + if( chroma_weight_l0_flag ) { - bs_write_se( s, sh->weight[i][j].i_scale ); - bs_write_se( s, sh->weight[i][j].i_offset ); + for( int j = 1; j < 3; j++ ) + { + bs_write_se( s, sh->weight[i][j].i_scale ); + bs_write_se( s, sh->weight[i][j].i_offset ); + } } } } @@ -357,7 +365,7 @@ /* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */ /* reallocate, adding an arbitrary amount of space. */ -static int x264_bitstream_check_buffer_internal( x264_t *h, int size, int b_cabac, int i_nal ) +static int bitstream_check_buffer_internal( x264_t *h, int size, int b_cabac, int i_nal ) { if( (b_cabac && (h->cabac.p_end - h->cabac.p < size)) || (h->out.bs.p_end - h->out.bs.p < size) ) @@ -390,20 +398,20 @@ return 0; } -static int x264_bitstream_check_buffer( x264_t *h ) +static int bitstream_check_buffer( x264_t *h ) { int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width; - return x264_bitstream_check_buffer_internal( h, max_row_size, h->param.b_cabac, h->out.i_nal ); + return bitstream_check_buffer_internal( h, max_row_size, h->param.b_cabac, h->out.i_nal ); } -static int x264_bitstream_check_buffer_filler( x264_t *h, int filler ) +static int bitstream_check_buffer_filler( x264_t *h, int filler ) { filler += 32; // add padding for safety - return x264_bitstream_check_buffer_internal( h, filler, 0, -1 ); + return bitstream_check_buffer_internal( h, filler, 0, -1 ); } #if HAVE_THREAD -static void x264_encoder_thread_init( x264_t *h ) +static void encoder_thread_init( x264_t *h ) { if( h->param.i_sync_lookahead ) x264_lower_thread_priority( 10 ); @@ -418,11 +426,11 @@ * ****************************************************************************/ -static int x264_validate_parameters( x264_t *h, int b_open ) +static int validate_parameters( x264_t *h, int b_open ) { if( !h->param.pf_log ) { - x264_log( NULL, X264_LOG_ERROR, "pf_log not set! did you forget to call x264_param_default?\n" ); + x264_log_internal( X264_LOG_ERROR, "pf_log not set! did you forget to call x264_param_default?\n" ); return -1; } @@ -471,7 +479,12 @@ int i_csp = h->param.i_csp & X264_CSP_MASK; #if X264_CHROMA_FORMAT - if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 ) + if( CHROMA_FORMAT != CHROMA_400 && i_csp == X264_CSP_I400 ) + { + x264_log( h, X264_LOG_ERROR, "not compiled with 4:0:0 support\n" ); + return -1; + } + else if( CHROMA_FORMAT != CHROMA_420 && i_csp >= X264_CSP_I420 && i_csp < X264_CSP_I422 ) { x264_log( h, X264_LOG_ERROR, "not compiled with 4:2:0 support\n" ); return -1; @@ -489,13 +502,26 @@ #endif if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX ) { - x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/YUYV/UYVY/" + x264_log( h, X264_LOG_ERROR, "invalid CSP (only I400/I420/YV12/NV12/NV21/I422/YV16/NV16/YUYV/UYVY/" "I444/YV24/BGR/BGRA/RGB supported)\n" ); return -1; } - int w_mod = i_csp < X264_CSP_I444 ? 2 : 1; - int h_mod = (i_csp < X264_CSP_I422 ? 2 : 1) << PARAM_INTERLACED; + int w_mod = 1; + int h_mod = 1 << (PARAM_INTERLACED || h->param.b_fake_interlaced); + if( i_csp == X264_CSP_I400 ) + { + h->param.analyse.i_chroma_qp_offset = 0; + h->param.analyse.b_chroma_me = 0; + h->param.vui.i_colmatrix = 2; /* undefined */ + } + else if( i_csp < X264_CSP_I444 ) + { + w_mod = 2; + if( i_csp < X264_CSP_I422 ) + h_mod *= 2; + } + if( h->param.i_width % w_mod ) { x264_log( h, X264_LOG_ERROR, "width not divisible by %d (%dx%d)\n", @@ -791,21 +817,36 @@ memcpy( h->param.cqm_4ic, avcintra_lut[type][res][i].cqm_4ic, sizeof(h->param.cqm_4ic) ); memcpy( h->param.cqm_8iy, avcintra_lut[type][res][i].cqm_8iy, sizeof(h->param.cqm_8iy) ); - /* Need exactly 10 slices of equal MB count... why? $deity knows... */ - h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10; - h->param.i_slice_max_size = 0; - /* The slice structure only allows a maximum of 2 threads for 1080i/p - * and 1 or 5 threads for 720p */ - if( h->param.b_sliced_threads ) + /* Sony XAVC flavor much more simple */ + if( h->param.i_avcintra_flavor == X264_AVCINTRA_FLAVOR_SONY ) { - if( res ) - h->param.i_threads = X264_MIN( 2, h->param.i_threads ); - else + h->param.i_slice_count = 8; + if( h->param.b_sliced_threads ) + h->param.i_threads = h->param.i_slice_count; + /* Sony XAVC unlike AVC-Intra doesn't seem to have a QP floor */ + } + else + { + /* Need exactly 10 slices of equal MB count... why? $deity knows... */ + h->param.i_slice_max_mbs = ((h->param.i_width + 15) / 16) * ((h->param.i_height + 15) / 16) / 10; + h->param.i_slice_max_size = 0; + /* The slice structure only allows a maximum of 2 threads for 1080i/p + * and 1 or 5 threads for 720p */ + if( h->param.b_sliced_threads ) { - h->param.i_threads = X264_MIN( 5, h->param.i_threads ); - if( h->param.i_threads < 5 ) - h->param.i_threads = 1; + if( res ) + h->param.i_threads = X264_MIN( 2, h->param.i_threads ); + else + { + h->param.i_threads = X264_MIN( 5, h->param.i_threads ); + if( h->param.i_threads < 5 ) + h->param.i_threads = 1; + } } + + /* Official encoder doesn't appear to go under 13 + * and Avid cannot handle negative QPs */ + h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 ); } if( type ) @@ -815,15 +856,11 @@ h->param.vui.i_sar_width = 4; h->param.vui.i_sar_height = 3; } - - /* Official encoder doesn't appear to go under 13 - * and Avid cannot handle negative QPs */ - h->param.rc.i_qp_min = X264_MAX( h->param.rc.i_qp_min, QP_BD_OFFSET + 1 ); } h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 ); h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 ); - h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX ); + h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, -1, QP_MAX ); h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 ); h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f ); h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f ); @@ -852,19 +889,18 @@ /* 8x8dct is not useful without RD in CAVLC lossless */ if( !h->param.b_cabac && h->param.analyse.i_subpel_refine < 6 ) h->param.analyse.b_transform_8x8 = 0; - h->param.analyse.inter &= ~X264_ANALYSE_I8x8; - h->param.analyse.intra &= ~X264_ANALYSE_I8x8; - } - if( i_csp >= X264_CSP_I444 && h->param.b_cabac ) - { - /* Disable 8x8dct during 4:4:4+CABAC encoding for compatibility with libavcodec */ - h->param.analyse.b_transform_8x8 = 0; } if( h->param.rc.i_rc_method == X264_RC_CQP ) { float qp_p = h->param.rc.i_qp_constant; float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor ); float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor ); + if( qp_p < 0 ) + { + x264_log( h, X264_LOG_ERROR, "qp not specified\n" ); + return -1; + } + h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX ); h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX ); h->param.rc.i_aq_mode = 0; @@ -1338,6 +1374,9 @@ switch( CHROMA_FORMAT ) { + case CHROMA_400: + h->mc.prefetch_fenc = h->mc.prefetch_fenc_400; + break; case CHROMA_420: memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) ); h->mc.prefetch_fenc = h->mc.prefetch_fenc_420; @@ -1368,7 +1407,7 @@ } } -static void x264_set_aspect_ratio( x264_t *h, x264_param_t *param, int initial ) +static void set_aspect_ratio( x264_t *h, x264_param_t *param, int initial ) { /* VUI */ if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 ) @@ -1431,7 +1470,7 @@ goto fail; } - if( x264_validate_parameters( h, 1 ) < 0 ) + if( validate_parameters( h, 1 ) < 0 ) goto fail; if( h->param.psz_cqm_file ) @@ -1461,9 +1500,10 @@ goto fail; } - x264_set_aspect_ratio( h, &h->param, 1 ); + set_aspect_ratio( h, &h->param, 1 ); x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); + x264_sps_init_scaling_list( h->sps, &h->param ); x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps ); x264_validate_levels( h, 1 ); @@ -1517,6 +1557,7 @@ h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1; h->frames.i_poc_last_open_gop = -1; + CHECKED_MALLOCZERO( h->cost_table, sizeof(*h->cost_table) ); CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) ); /* Allocate room for max refs plus a few extra just in case. */ CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + X264_REF_MAX + 4) * sizeof(x264_frame_t *) ); @@ -1553,7 +1594,7 @@ if( h->param.b_cabac ) x264_cabac_init( h ); else - x264_stack_align( x264_cavlc_init, h ); + x264_cavlc_init( h ); mbcmp_init( h ); chroma_dsp_init( h ); @@ -1616,7 +1657,7 @@ CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) ); if( h->param.i_threads > 1 && - x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) ) + x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)encoder_thread_init, h ) ) goto fail; if( h->param.i_lookahead_threads > 1 && x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) ) @@ -1717,26 +1758,20 @@ const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" : h->sps->i_profile_idc == PROFILE_MAIN ? "Main" : h->sps->i_profile_idc == PROFILE_HIGH ? "High" : - h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") : - h->sps->i_profile_idc == PROFILE_HIGH422 ? (h->sps->b_constraint_set3 == 1 ? "High 4:2:2 Intra" : "High 4:2:2") : - h->sps->b_constraint_set3 == 1 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive"; + h->sps->i_profile_idc == PROFILE_HIGH10 ? + (h->sps->b_constraint_set3 ? "High 10 Intra" : "High 10") : + h->sps->i_profile_idc == PROFILE_HIGH422 ? + (h->sps->b_constraint_set3 ? "High 4:2:2 Intra" : "High 4:2:2") : + h->sps->b_constraint_set3 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive"; char level[4]; snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 ); if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 && (h->sps->i_profile_idc == PROFILE_BASELINE || h->sps->i_profile_idc == PROFILE_MAIN) ) ) strcpy( level, "1b" ); - if( h->sps->i_profile_idc < PROFILE_HIGH10 ) - { - x264_log( h, X264_LOG_INFO, "profile %s, level %s\n", - profile, level ); - } - else - { - static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" }; - x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s %d-bit\n", - profile, level, subsampling[CHROMA_FORMAT], BIT_DEPTH ); - } + static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" }; + x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s, %d-bit\n", + profile, level, subsampling[CHROMA_FORMAT], BIT_DEPTH ); return h; fail: @@ -1745,10 +1780,10 @@ } /****************************************************************************/ -static int x264_encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig ) +static int encoder_try_reconfig( x264_t *h, x264_param_t *param, int *rc_reconfig ) { *rc_reconfig = 0; - x264_set_aspect_ratio( h, param, 0 ); + set_aspect_ratio( h, param, 0 ); #define COPY(var) h->param.var = param->var COPY( i_frame_reference ); // but never uses more refs than initially specified COPY( i_bframe_bias ); @@ -1809,13 +1844,13 @@ COPY( rc.f_rf_constant_max ); #undef COPY - return x264_validate_parameters( h, 0 ); + return validate_parameters( h, 0 ); } int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param ) { int rc_reconfig; - int ret = x264_encoder_try_reconfig( h, param, &rc_reconfig ); + int ret = encoder_try_reconfig( h, param, &rc_reconfig ); mbcmp_init( h ); if( !ret ) @@ -1842,7 +1877,7 @@ h->reconfig_h->param = h->param; int rc_reconfig; - int ret = x264_encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig ); + int ret = encoder_try_reconfig( h->reconfig_h, param, &rc_reconfig ); if( !ret ) h->reconfig = 1; else @@ -1860,7 +1895,7 @@ } /* internal usage */ -static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc ) +static void nal_start( x264_t *h, int i_type, int i_ref_idc ) { x264_nal_t *nal = &h->out.nal[h->out.i_nal]; @@ -1874,7 +1909,7 @@ } /* if number of allocated nals is not enough, re-allocate a larger one. */ -static int x264_nal_check_buffer( x264_t *h ) +static int nal_check_buffer( x264_t *h ) { if( h->out.i_nal >= h->out.i_nals_allocated ) { @@ -1889,7 +1924,7 @@ return 0; } -static int x264_nal_end( x264_t *h ) +static int nal_end( x264_t *h ) { x264_nal_t *nal = &h->out.nal[h->out.i_nal]; uint8_t *end = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8]; @@ -1901,11 +1936,11 @@ h->param.nalu_process( h, nal, h->fenc->opaque ); h->out.i_nal++; - return x264_nal_check_buffer( h ); + return nal_check_buffer( h ); } -static int x264_check_encapsulated_buffer( x264_t *h, x264_t *h0, int start, - int previous_nal_size, int necessary_size ) +static int check_encapsulated_buffer( x264_t *h, x264_t *h0, int start, + int previous_nal_size, int necessary_size ) { if( h0->nal_buffer_size < necessary_size ) { @@ -1928,7 +1963,7 @@ return 0; } -static int x264_encoder_encapsulate_nals( x264_t *h, int start ) +static int encoder_encapsulate_nals( x264_t *h, int start ) { x264_t *h0 = h->thread[0]; int nal_size = 0, previous_nal_size = 0; @@ -1950,7 +1985,7 @@ int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64; for( int i = start; i < h->out.i_nal; i++ ) necessary_size += h->out.nal[i].i_padding; - if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) ) + if( check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) ) return -1; uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size; @@ -1981,25 +2016,25 @@ /* Write SEI, SPS and PPS. */ /* generate sequence parameters */ - x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); + nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); x264_sps_write( &h->out.bs, h->sps ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; /* generate picture parameters */ - x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); + nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->sps, h->pps ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; /* identify ourselves */ - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_version_write( h, &h->out.bs ) ) return -1; - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; - frame_size = x264_encoder_encapsulate_nals( h, 0 ); + frame_size = encoder_encapsulate_nals( h, 0 ); if( frame_size < 0 ) return -1; @@ -2013,7 +2048,7 @@ /* Check to see whether we have chosen a reference list ordering different * from the standard's default. */ -static inline void x264_reference_check_reorder( x264_t *h ) +static inline void reference_check_reorder( x264_t *h ) { /* The reorder check doesn't check for missing frames, so just * force a reorder if one of the reference list is corrupt. */ @@ -2038,7 +2073,7 @@ } /* return -1 on failure, else return the index of the new reference frame */ -static int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w ) +static int weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w ) { int i = h->i_ref[0]; int j = 1; @@ -2077,7 +2112,7 @@ return j; } -static void x264_weighted_pred_init( x264_t *h ) +static void weighted_pred_init( x264_t *h ) { /* for now no analysis and set all weights to nothing */ for( int i_ref = 0; i_ref < h->i_ref[0]; i_ref++ ) @@ -2161,7 +2196,7 @@ h->sh.weight[0][2].i_denom = h->sh.weight[0][1].i_denom; } -static inline int x264_reference_distance( x264_t *h, x264_frame_t *frame ) +static inline int reference_distance( x264_t *h, x264_frame_t *frame ) { if( h->param.i_frame_packing == 5 ) return abs((h->fenc->i_frame&~1) - (frame->i_frame&~1)) + @@ -2170,7 +2205,7 @@ return abs(h->fenc->i_frame - frame->i_frame); } -static inline void x264_reference_build_list( x264_t *h, int i_poc ) +static inline void reference_build_list( x264_t *h, int i_poc ) { int b_ok; @@ -2227,7 +2262,7 @@ if( list ? h->fref[list][i+1]->i_poc < h->fref_nearest[list]->i_poc : h->fref[list][i+1]->i_poc > h->fref_nearest[list]->i_poc ) h->fref_nearest[list] = h->fref[list][i+1]; - if( x264_reference_distance( h, h->fref[list][i] ) > x264_reference_distance( h, h->fref[list][i+1] ) ) + if( reference_distance( h, h->fref[list][i] ) > reference_distance( h, h->fref[list][i+1] ) ) { XCHG( x264_frame_t*, h->fref[list][i], h->fref[list][i+1] ); b_ok = 0; @@ -2237,7 +2272,7 @@ } while( !b_ok ); } - x264_reference_check_reorder( h ); + reference_check_reorder( h ); h->i_ref[1] = X264_MIN( h->i_ref[1], h->frames.i_max_ref1 ); h->i_ref[0] = X264_MIN( h->i_ref[0], h->frames.i_max_ref0 ); @@ -2262,7 +2297,7 @@ { h->fenc->weight[0][0].i_denom = 0; SET_WEIGHT( w[0], 1, 1, 0, -1 ); - idx = x264_weighted_reference_duplicate( h, 0, w ); + idx = weighted_reference_duplicate( h, 0, w ); } else { @@ -2270,13 +2305,13 @@ { SET_WEIGHT( h->fenc->weight[0][0], 1, 1, 0, h->fenc->weight[0][0].i_offset ); } - x264_weighted_reference_duplicate( h, 0, x264_weight_none ); + weighted_reference_duplicate( h, 0, x264_weight_none ); if( h->fenc->weight[0][0].i_offset > -128 ) { w[0] = h->fenc->weight[0][0]; w[0].i_offset--; h->mc.weight_cache( h, &w[0] ); - idx = x264_weighted_reference_duplicate( h, 0, w ); + idx = weighted_reference_duplicate( h, 0, w ); } } } @@ -2288,7 +2323,7 @@ h->mb.pic.i_fref[1] = h->i_ref[1]; } -static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass ) +static void fdec_filter_row( x264_t *h, int mb_y, int pass ) { /* mb_y is the mb to be encoded next, not the mb to be filtered here */ int b_hpel = h->fdec->b_kept_as_ref; @@ -2408,7 +2443,7 @@ } } -static inline int x264_reference_update( x264_t *h ) +static inline int reference_update( x264_t *h ) { if( !h->fdec->b_kept_as_ref ) { @@ -2438,7 +2473,7 @@ return 0; } -static inline void x264_reference_reset( x264_t *h ) +static inline void reference_reset( x264_t *h ) { while( h->frames.reference[0] ) x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) ); @@ -2446,7 +2481,7 @@ h->fenc->i_poc = 0; } -static inline void x264_reference_hierarchy_reset( x264_t *h ) +static inline void reference_hierarchy_reset( x264_t *h ) { int ref; int b_hasdelayframe = 0; @@ -2483,12 +2518,12 @@ h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 ); } -static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp ) +static inline void slice_init( x264_t *h, int i_nal_type, int i_global_qp ) { /* ------------------------ Create slice header ----------------------- */ if( i_nal_type == NAL_SLICE_IDR ) { - x264_slice_header_init( h, &h->sh, h->sps, h->pps, h->i_idr_pic_id, h->i_frame_num, i_global_qp ); + slice_header_init( h, &h->sh, h->sps, h->pps, h->i_idr_pic_id, h->i_frame_num, i_global_qp ); /* alternate id */ if( h->param.i_avcintra_class ) @@ -2512,7 +2547,7 @@ } else { - x264_slice_header_init( h, &h->sh, h->sps, h->pps, -1, h->i_frame_num, i_global_qp ); + slice_header_init( h, &h->sh, h->sps, h->pps, -1, h->i_frame_num, i_global_qp ); h->sh.i_num_ref_idx_l0_active = h->i_ref[0] <= 0 ? 1 : h->i_ref[0]; h->sh.i_num_ref_idx_l1_active = h->i_ref[1] <= 0 ? 1 : h->i_ref[1]; @@ -2564,7 +2599,7 @@ int field_decoding_flag; } x264_bs_bak_t; -static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full ) +static ALWAYS_INLINE void bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full ) { if( full ) { @@ -2598,7 +2633,7 @@ } } -static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full ) +static ALWAYS_INLINE void bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full ) { if( full ) { @@ -2627,7 +2662,7 @@ } } -static intptr_t x264_slice_write( x264_t *h ) +static intptr_t slice_write( x264_t *h ) { int i_skip; int mb_xy, i_mb_x, i_mb_y; @@ -2654,7 +2689,7 @@ bs_realign( &h->out.bs ); /* Slice */ - x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc ); + nal_start( h, h->i_nal_type, h->i_nal_ref_idc ); h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb; /* Slice header */ @@ -2666,7 +2701,7 @@ h->sh.i_qp = SPEC_QP( h->sh.i_qp ); h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp; - x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc ); + slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc ); if( h->param.b_cabac ) { /* alignment needed */ @@ -2694,23 +2729,23 @@ if( i_mb_x == 0 ) { - if( x264_bitstream_check_buffer( h ) ) + if( bitstream_check_buffer( h ) ) return -1; if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size ) - x264_bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 ); + bitstream_backup( h, &bs_bak[BS_BAK_ROW_VBV], i_skip, 1 ); if( !h->mb.b_reencode_mb ) - x264_fdec_filter_row( h, i_mb_y, 0 ); + fdec_filter_row( h, i_mb_y, 0 ); } if( back_up_bitstream ) { if( back_up_bitstream_cavlc ) - x264_bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 ); + bitstream_backup( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], i_skip, 0 ); if( slice_max_size && !(i_mb_y & SLICE_MBAFF) ) { - x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 ); + bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], i_skip, 0 ); if( (thread_last_mb+1-mb_xy) == h->param.i_slice_min_mbs ) - x264_bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 ); + bitstream_backup( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], i_skip, 0 ); } } @@ -2775,7 +2810,7 @@ h->mb.i_skip_intra = 0; h->mb.b_skip_mc = 0; h->mb.b_overflow = 0; - x264_bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 ); + bitstream_restore( h, &bs_bak[BS_BAK_CAVLC_OVERFLOW], &i_skip, 0 ); goto reencode; } } @@ -2815,14 +2850,14 @@ slice_max_size = 0; goto cont; } - x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 ); + bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MIN_MBS], &i_skip, 0 ); h->mb.b_reencode_mb = 1; h->sh.i_last_mb = thread_last_mb-h->param.i_slice_min_mbs; break; } if( mb_xy-SLICE_MBAFF*h->mb.i_mb_stride != h->sh.i_first_mb ) { - x264_bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); + bitstream_restore( h, &bs_bak[BS_BAK_SLICE_MAX_SIZE], &i_skip, 0 ); h->mb.b_reencode_mb = 1; if( SLICE_MBAFF ) { @@ -2851,7 +2886,7 @@ if( x264_ratecontrol_mb( h, mb_size ) < 0 ) { - x264_bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 ); + bitstream_restore( h, &bs_bak[BS_BAK_ROW_VBV], &i_skip, 1 ); h->mb.b_reencode_mb = 1; i_mb_x = 0; i_mb_y = i_mb_y - SLICE_MBAFF; @@ -2968,7 +3003,7 @@ bs_rbsp_trailing( &h->out.bs ); bs_flush( &h->out.bs ); } - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) ) @@ -2977,7 +3012,7 @@ + (h->out.i_nal*NALU_OVERHEAD * 8) - h->stat.frame.i_tex_bits - h->stat.frame.i_mv_bits; - x264_fdec_filter_row( h, h->i_threadslice_end, 0 ); + fdec_filter_row( h, h->i_threadslice_end, 0 ); if( h->param.b_sliced_threads ) { @@ -2985,13 +3020,13 @@ x264_threadslice_cond_broadcast( h, 1 ); /* Do hpel now */ for( int mb_y = h->i_threadslice_start; mb_y <= h->i_threadslice_end; mb_y++ ) - x264_fdec_filter_row( h, mb_y, 1 ); + fdec_filter_row( h, mb_y, 1 ); x264_threadslice_cond_broadcast( h, 2 ); /* Do the first row of hpel, now that the previous slice is done */ if( h->i_thread_idx > 0 ) { x264_threadslice_cond_wait( h->thread[h->i_thread_idx-1], 2 ); - x264_fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 ); + fdec_filter_row( h, h->i_threadslice_start + (1 << SLICE_MBAFF), 2 ); } } @@ -3007,7 +3042,7 @@ return 0; } -static void x264_thread_sync_context( x264_t *dst, x264_t *src ) +static void thread_sync_context( x264_t *dst, x264_t *src ) { if( dst == src ) return; @@ -3028,16 +3063,17 @@ dst->reconfig = src->reconfig; } -static void x264_thread_sync_stat( x264_t *dst, x264_t *src ) +static void thread_sync_stat( x264_t *dst, x264_t *src ) { if( dst != src ) memcpy( &dst->stat, &src->stat, offsetof(x264_t, stat.frame) - offsetof(x264_t, stat) ); } -static void *x264_slices_write( x264_t *h ) +static void *slices_write( x264_t *h ) { int i_slice_num = 0; int last_thread_mb = h->sh.i_last_mb; + int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2; /* init stats */ memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); @@ -3072,11 +3108,11 @@ int height = h->mb.i_mb_height >> PARAM_INTERLACED; int width = h->mb.i_mb_width << PARAM_INTERLACED; i_slice_num++; - h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1; + h->sh.i_last_mb = (height * i_slice_num + round_bias) / h->param.i_slice_count * width - 1; } } h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb ); - if( x264_stack_align( x264_slice_write, h ) ) + if( slice_write( h ) ) goto fail; h->sh.i_first_mb = h->sh.i_last_mb + 1; // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order @@ -3093,8 +3129,10 @@ return (void *)-1; } -static int x264_threaded_slices_write( x264_t *h ) +static int threaded_slices_write( x264_t *h ) { + int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2; + /* set first/last mb and sync contexts */ for( int i = 0; i < h->param.i_threads; i++ ) { @@ -3105,13 +3143,13 @@ memcpy( &t->i_frame, &h->i_frame, offsetof(x264_t, rc) - offsetof(x264_t, i_frame) ); } int height = h->mb.i_mb_height >> PARAM_INTERLACED; - t->i_threadslice_start = ((height * i + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED; - t->i_threadslice_end = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED; + t->i_threadslice_start = ((height * i + round_bias) / h->param.i_threads) << PARAM_INTERLACED; + t->i_threadslice_end = ((height * (i+1) + round_bias) / h->param.i_threads) << PARAM_INTERLACED; t->sh.i_first_mb = t->i_threadslice_start * h->mb.i_mb_width; t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1; } - x264_stack_align( x264_analyse_weight_frame, h, h->mb.i_mb_height*16 + 16 ); + x264_analyse_weight_frame( h, h->mb.i_mb_height*16 + 16 ); x264_threads_distribute_ratecontrol( h ); @@ -3124,7 +3162,7 @@ } /* dispatch */ for( int i = 0; i < h->param.i_threads; i++ ) - x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] ); + x264_threadpool_run( h->threadpool, (void*)slices_write, h->thread[i] ); /* wait */ for( int i = 0; i < h->param.i_threads; i++ ) x264_threadslice_cond_wait( h->thread[i], 1 ); @@ -3138,7 +3176,7 @@ { h->out.nal[h->out.i_nal] = t->out.nal[j]; h->out.i_nal++; - x264_nal_check_buffer( h ); + nal_check_buffer( h ); } /* All entries in stat.frame are ints except for ssd/ssim. */ for( int j = 0; j < (offsetof(x264_t,stat.frame.i_ssd) - offsetof(x264_t,stat.frame.i_mv_bits)) / sizeof(int); j++ ) @@ -3215,7 +3253,7 @@ h->i_thread_phase = (h->i_thread_phase + 1) % h->i_thread_frames; thread_current = h->thread[ h->i_thread_phase ]; thread_oldest = h->thread[ (h->i_thread_phase + 1) % h->i_thread_frames ]; - x264_thread_sync_context( thread_current, thread_prev ); + thread_sync_context( thread_current, thread_prev ); x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest ); h = thread_current; } @@ -3289,7 +3327,7 @@ return -1; } else - x264_stack_align( x264_adaptive_quant_frame, h, fenc, pic_in->prop.quant_offsets ); + x264_adaptive_quant_frame( h, fenc, pic_in->prop.quant_offsets ); if( pic_in->prop.quant_offsets_free ) pic_in->prop.quant_offsets_free( pic_in->prop.quant_offsets ); @@ -3322,7 +3360,7 @@ x264_lookahead_get_frames( h ); if( !h->frames.current[0] && x264_lookahead_is_empty( h ) ) - return x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); + return encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); /* ------------------- Get frame to be encoded ------------------------- */ /* 4: get picture to encode */ @@ -3330,7 +3368,7 @@ /* If applicable, wait for previous frame reconstruction to finish */ if( h->param.b_sliced_threads ) - if( x264_threadpool_wait_all( h ) < 0 ) + if( threadpool_wait_all( h ) < 0 ) return -1; if( h->i_frame == 0 ) @@ -3352,7 +3390,7 @@ x264_ratecontrol_zone_init( h ); // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0 - if( x264_reference_update( h ) ) + if( reference_update( h ) ) return -1; h->fdec->i_lines_completed = -1; @@ -3394,7 +3432,7 @@ i_nal_type = NAL_SLICE_IDR; i_nal_ref_idc = NAL_PRIORITY_HIGHEST; h->sh.i_type = SLICE_TYPE_I; - x264_reference_reset( h ); + reference_reset( h ); h->frames.i_poc_last_open_gop = -1; } else if( h->fenc->i_type == X264_TYPE_I ) @@ -3402,7 +3440,7 @@ i_nal_type = NAL_SLICE; i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/ h->sh.i_type = SLICE_TYPE_I; - x264_reference_hierarchy_reset( h ); + reference_hierarchy_reset( h ); if( h->param.b_open_gop ) h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1; } @@ -3411,7 +3449,7 @@ i_nal_type = NAL_SLICE; i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/ h->sh.i_type = SLICE_TYPE_P; - x264_reference_hierarchy_reset( h ); + reference_hierarchy_reset( h ); h->frames.i_poc_last_open_gop = -1; } else if( h->fenc->i_type == X264_TYPE_BREF ) @@ -3419,7 +3457,7 @@ i_nal_type = NAL_SLICE; i_nal_ref_idc = h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT ? NAL_PRIORITY_LOW : NAL_PRIORITY_HIGH; h->sh.i_type = SLICE_TYPE_B; - x264_reference_hierarchy_reset( h ); + reference_hierarchy_reset( h ); } else /* B frame */ { @@ -3454,7 +3492,7 @@ /* ------------------- Init ----------------------------- */ /* build ref list 0/1 */ - x264_reference_build_list( h, h->fdec->i_poc ); + reference_build_list( h, h->fdec->i_poc ); /* ---------------------- Write the bitstream -------------------------- */ /* Init bitstream context */ @@ -3485,10 +3523,11 @@ else pic_type = 7; - x264_nal_start( h, NAL_AUD, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_AUD, NAL_PRIORITY_DISPOSABLE ); bs_write( &h->out.bs, 3, pic_type ); bs_rbsp_trailing( &h->out.bs ); - if( x264_nal_end( h ) ) + bs_flush( &h->out.bs ); + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; } @@ -3538,9 +3577,9 @@ if( h->param.b_repeat_headers ) { /* generate sequence parameters */ - x264_nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); + nal_start( h, NAL_SPS, NAL_PRIORITY_HIGHEST ); x264_sps_write( &h->out.bs, h->sps ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; /* Pad AUD/SPS to 256 bytes like Panasonic */ if( h->param.i_avcintra_class ) @@ -3548,22 +3587,28 @@ overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD; /* generate picture parameters */ - x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); + nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST ); x264_pps_write( &h->out.bs, h->sps, h->pps ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; if( h->param.i_avcintra_class ) - h->out.nal[h->out.i_nal-1].i_padding = 256 - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD; + { + int total_len = 256; + /* Sony XAVC uses an oversized PPS instead of SEI padding */ + if( h->param.i_avcintra_flavor == X264_AVCINTRA_FLAVOR_SONY ) + total_len += h->param.i_height == 1080 ? 18*512 : 10*512; + h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - NALU_OVERHEAD; + } overhead += h->out.nal[h->out.i_nal-1].i_payload + h->out.nal[h->out.i_nal-1].i_padding + NALU_OVERHEAD; } - /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */ + /* when frame threading is used, buffering period sei is written in encoder_frame_end */ if( h->i_thread_frames == 1 && h->sps->vui.b_nal_hrd_parameters_present ) { x264_hrd_fullness( h ); - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_buffering_period_write( h, &h->out.bs ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3572,10 +3617,10 @@ /* write extra sei */ for( int i = 0; i < h->fenc->extra_sei.num_payloads; i++ ) { - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_write( &h->out.bs, h->fenc->extra_sei.payloads[i].payload, h->fenc->extra_sei.payloads[i].payload_size, h->fenc->extra_sei.payloads[i].payload_type ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; if( h->fenc->extra_sei.sei_free ) @@ -3598,10 +3643,10 @@ if( h->param.b_repeat_headers && h->fenc->i_frame == 0 && !h->param.i_avcintra_class ) { /* identify ourself */ - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_version_write( h, &h->out.bs ) ) return -1; - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3609,9 +3654,9 @@ if( h->fenc->i_type != X264_TYPE_IDR ) { int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe - 1; - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3619,9 +3664,18 @@ if( h->param.i_frame_packing >= 0 && (h->fenc->b_keyframe || h->param.i_frame_packing == 5) ) { - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_frame_packing_write( h, &h->out.bs ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) + return -1; + overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; + } + + if( h->param.i_alternative_transfer != 2 ) + { + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + x264_sei_alternative_transfer_write( h, &h->out.bs ); + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3629,9 +3683,9 @@ /* generate sei pic timing */ if( h->sps->vui.b_pic_struct_present || h->sps->vui.b_nal_hrd_parameters_present ) { - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_pic_timing_write( h, &h->out.bs ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3640,9 +3694,9 @@ if( !IS_X264_TYPE_B( h->fenc->i_type ) && h->b_sh_backup ) { h->b_sh_backup = 0; - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_dec_ref_pic_marking_write( h, &h->out.bs ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } @@ -3651,21 +3705,21 @@ h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay; /* Filler space: 10 or 18 SEIs' worth of space, depending on resolution */ - if( h->param.i_avcintra_class ) + if( h->param.i_avcintra_class && h->param.i_avcintra_flavor != X264_AVCINTRA_FLAVOR_SONY ) { /* Write an empty filler NAL to mimic the AUD in the P2 format*/ - x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); x264_filler_write( h, &h->out.bs, 0 ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD; /* All lengths are magic lengths that decoders expect to see */ /* "UMID" SEI */ - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_avcintra_umid_write( h, &h->out.bs ) < 0 ) return -1; - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; @@ -3682,10 +3736,10 @@ total_len = 9*512; } /* "VANC" SEI */ - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); if( x264_sei_avcintra_vanc_write( h, &h->out.bs, unpadded_len ) < 0 ) return -1; - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; h->out.nal[h->out.i_nal-1].i_padding = total_len - h->out.nal[h->out.i_nal-1].i_payload - SEI_OVERHEAD; @@ -3703,20 +3757,20 @@ if( h->param.rc.b_stat_read && h->sh.i_type != SLICE_TYPE_I ) { x264_reference_build_list_optimal( h ); - x264_reference_check_reorder( h ); + reference_check_reorder( h ); } if( h->i_ref[0] ) h->fdec->i_poc_l0ref0 = h->fref[0][0]->i_poc; /* ------------------------ Create slice header ----------------------- */ - x264_slice_init( h, i_nal_type, i_global_qp ); + slice_init( h, i_nal_type, i_global_qp ); /*------------------------- Weights -------------------------------------*/ if( h->sh.i_type == SLICE_TYPE_B ) x264_macroblock_bipred_init( h ); - x264_weighted_pred_init( h ); + weighted_pred_init( h ); if( i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE ) h->i_frame_num++; @@ -3726,24 +3780,24 @@ h->i_threadslice_end = h->mb.i_mb_height; if( h->i_thread_frames > 1 ) { - x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h ); + x264_threadpool_run( h->threadpool, (void*)slices_write, h ); h->b_thread_active = 1; } else if( h->param.b_sliced_threads ) { - if( x264_threaded_slices_write( h ) ) + if( threaded_slices_write( h ) ) return -1; } else - if( (intptr_t)x264_slices_write( h ) ) + if( (intptr_t)slices_write( h ) ) return -1; - return x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); + return encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); } -static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, - x264_nal_t **pp_nal, int *pi_nal, - x264_picture_t *pic_out ) +static int encoder_frame_end( x264_t *h, x264_t *thread_current, + x264_nal_t **pp_nal, int *pi_nal, + x264_picture_t *pic_out ) { char psz_message[80]; @@ -3765,9 +3819,9 @@ if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present ) { x264_hrd_fullness( h ); - x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_buffering_period_write( h, &h->out.bs ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; /* buffering period sei must follow AUD, SPS and PPS and precede all other SEIs */ int idx = 0; @@ -3780,7 +3834,7 @@ h->out.nal[idx] = nal_tmp; } - int frame_size = x264_encoder_encapsulate_nals( h, 0 ); + int frame_size = encoder_encapsulate_nals( h, 0 ); if( frame_size < 0 ) return -1; @@ -3825,7 +3879,7 @@ * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */ if( h->param.i_avcintra_class ) { - if( x264_check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, frame_size + filler ) < 0 ) + if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, frame_size + filler ) < 0 ) return -1; x264_nal_t *nal = &h->out.nal[h->out.i_nal-1]; @@ -3860,13 +3914,13 @@ else f = X264_MAX( 0, filler - overhead ); - if( x264_bitstream_check_buffer_filler( h, f ) ) + if( bitstream_check_buffer_filler( h, f ) ) return -1; - x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); + nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); x264_filler_write( h, &h->out.bs, f ); - if( x264_nal_end( h ) ) + if( nal_end( h ) ) return -1; - int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 ); + int total_size = encoder_encapsulate_nals( h, h->out.i_nal-1 ); if( total_size < 0 ) return -1; frame_size += total_size; @@ -3883,7 +3937,7 @@ x264_noise_reduction_update( h ); /* ---------------------- Compute/Print statistics --------------------- */ - x264_thread_sync_stat( h, h->thread[0] ); + thread_sync_stat( h, h->thread[0] ); /* Slice stat */ h->stat.i_frame_count[h->sh.i_type]++; @@ -3941,10 +3995,10 @@ }; int luma_size = h->param.i_width * h->param.i_height; int chroma_size = CHROMA_SIZE( luma_size ); - pic_out->prop.f_psnr[0] = x264_psnr( ssd[0], luma_size ); - pic_out->prop.f_psnr[1] = x264_psnr( ssd[1], chroma_size ); - pic_out->prop.f_psnr[2] = x264_psnr( ssd[2], chroma_size ); - pic_out->prop.f_psnr_avg = x264_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 ); + pic_out->prop.f_psnr[0] = calc_psnr( ssd[0], luma_size ); + pic_out->prop.f_psnr[1] = calc_psnr( ssd[1], chroma_size ); + pic_out->prop.f_psnr[2] = calc_psnr( ssd[2], chroma_size ); + pic_out->prop.f_psnr_avg = calc_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 ); h->stat.f_ssd_global[h->sh.i_type] += dur * (ssd[0] + ssd[1] + ssd[2]); h->stat.f_psnr_average[h->sh.i_type] += dur * pic_out->prop.f_psnr_avg; @@ -3980,9 +4034,9 @@ psz_message ); // keep stats all in one place - x264_thread_sync_stat( h->thread[0], h ); + thread_sync_stat( h->thread[0], h ); // for the use of the next frame - x264_thread_sync_stat( thread_current, h ); + thread_sync_stat( thread_current, h ); #ifdef DEBUG_MB_TYPE { @@ -4011,13 +4065,13 @@ } if( h->param.psz_dump_yuv ) - x264_frame_dump( h ); + frame_dump( h ); x264_emms(); return frame_size; } -static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra ) +static void print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra ) { intra += sprintf( intra, "I16..4%s: %4.1f%% %4.1f%% %4.1f%%", b_print_pcm ? "..PCM" : "", @@ -4048,7 +4102,7 @@ #endif if( h->param.b_sliced_threads ) - x264_threadpool_wait_all( h ); + threadpool_wait_all( h ); if( h->param.i_threads > 1 ) x264_threadpool_delete( h->threadpool ); if( h->param.i_lookahead_threads > 1 ) @@ -4089,7 +4143,7 @@ (double)h->stat.i_frame_size[i_slice] / i_count, h->stat.f_psnr_mean_y[i_slice] / dur, h->stat.f_psnr_mean_u[i_slice] / dur, h->stat.f_psnr_mean_v[i_slice] / dur, h->stat.f_psnr_average[i_slice] / dur, - x264_psnr( h->stat.f_ssd_global[i_slice], dur * i_yuv_size ) ); + calc_psnr( h->stat.f_ssd_global[i_slice], dur * i_yuv_size ) ); } else { @@ -4126,7 +4180,7 @@ { int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I]; double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0; - x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); + print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, "mb I %s\n", buf ); } if( h->stat.i_frame_count[SLICE_TYPE_P] > 0 ) @@ -4134,7 +4188,7 @@ int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_P]; double i_count = (double)h->stat.i_frame_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_P]; - x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); + print_intra( i_mb_count, i_count, b_print_pcm, buf ); x264_log( h, X264_LOG_INFO, "mb P %s P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n", buf, @@ -4152,7 +4206,7 @@ double i_mb_list_count; int64_t *i_mb_size = i_mb_count_size[SLICE_TYPE_B]; int64_t list_count[3] = {0}; /* 0 == L0, 1 == L1, 2 == BI */ - x264_print_intra( i_mb_count, i_count, b_print_pcm, buf ); + print_intra( i_mb_count, i_count, b_print_pcm, buf ); for( int i = 0; i < X264_PARTTYPE_MAX; i++ ) for( int j = 0; j < 2; j++ ) { @@ -4232,17 +4286,27 @@ } buf[0] = 0; - int csize = CHROMA444 ? 4 : 1; - if( i_mb_count != i_all_intra ) - sprintf( buf, " inter: %.1f%% %.1f%% %.1f%%", - h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4), - h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra)*csize), - h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)*csize) ); - x264_log( h, X264_LOG_INFO, "coded y,%s,%s intra: %.1f%% %.1f%% %.1f%%%s\n", - CHROMA444?"u":"uvDC", CHROMA444?"v":"uvAC", - h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4), - h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra*csize), - h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra*csize), buf ); + if( CHROMA_FORMAT ) + { + int csize = CHROMA444 ? 4 : 1; + if( i_mb_count != i_all_intra ) + sprintf( buf, " inter: %.1f%% %.1f%% %.1f%%", + h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4), + h->stat.i_mb_cbp[3] * 100.0 / ((i_mb_count - i_all_intra)*csize), + h->stat.i_mb_cbp[5] * 100.0 / ((i_mb_count - i_all_intra)*csize) ); + x264_log( h, X264_LOG_INFO, "coded y,%s,%s intra: %.1f%% %.1f%% %.1f%%%s\n", + CHROMA444?"u":"uvDC", CHROMA444?"v":"uvAC", + h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4), + h->stat.i_mb_cbp[2] * 100.0 / (i_all_intra*csize), + h->stat.i_mb_cbp[4] * 100.0 / (i_all_intra*csize), buf ); + } + else + { + if( i_mb_count != i_all_intra ) + sprintf( buf, " inter: %.1f%%", h->stat.i_mb_cbp[1] * 100.0 / ((i_mb_count - i_all_intra)*4) ); + x264_log( h, X264_LOG_INFO, "coded y intra: %.1f%%%s\n", + h->stat.i_mb_cbp[0] * 100.0 / (i_all_intra*4), buf ); + } int64_t fixed_pred_modes[4][9] = {{0}}; int64_t sum_pred_modes[4] = {0}; @@ -4289,9 +4353,13 @@ fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] ); if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->stat.i_frame_count[SLICE_TYPE_P] > 0 ) - x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n", - h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P], - h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] ); + { + buf[0] = 0; + if( CHROMA_FORMAT ) + sprintf( buf, " UV:%.1f%%", h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] ); + x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%%s\n", + h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P], buf ); + } for( int i_list = 0; i_list < 2; i_list++ ) for( int i_slice = 0; i_slice < 2; i_slice++ ) @@ -4315,7 +4383,7 @@ if( h->param.analyse.b_ssim ) { float ssim = SUM3( h->stat.f_ssim_mean_y ) / duration; - x264_log( h, X264_LOG_INFO, "SSIM Mean Y:%.7f (%6.3fdb)\n", ssim, x264_ssim( ssim ) ); + x264_log( h, X264_LOG_INFO, "SSIM Mean Y:%.7f (%6.3fdb)\n", ssim, calc_ssim_db( ssim ) ); } if( h->param.analyse.b_psnr ) { @@ -4325,7 +4393,7 @@ SUM3( h->stat.f_psnr_mean_u ) / duration, SUM3( h->stat.f_psnr_mean_v ) / duration, SUM3( h->stat.f_psnr_average ) / duration, - x264_psnr( SUM3( h->stat.f_ssd_global ), duration * i_yuv_size ), + calc_psnr( SUM3( h->stat.f_ssd_global ), duration * i_yuv_size ), f_bitrate ); } else @@ -4345,6 +4413,7 @@ x264_free( h->nal_buffer ); x264_free( h->reconfig_h ); x264_analyse_free_costs( h ); + x264_free( h->cost_table ); if( h->i_thread_frames > 1 ) h = h->thread[h->i_thread_phase]; diff -Nru x264-0.152.2854+gite9a5903/encoder/lookahead.c x264-0.158.2988+git-20191101.7817004/encoder/lookahead.c --- x264-0.152.2854+gite9a5903/encoder/lookahead.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/lookahead.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * lookahead.c: high-level lookahead functions ***************************************************************************** - * Copyright (C) 2010-2017 Avail Media and x264 project + * Copyright (C) 2010-2019 Avail Media and x264 project * * Authors: Michael Kazmier * Alex Giladi @@ -39,7 +39,7 @@ #include "common/common.h" #include "analyse.h" -static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count ) +static void lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count ) { int i = count; while( i-- ) @@ -56,7 +56,7 @@ } } -static void x264_lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb ) +static void lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb ) { if( h->lookahead->last_nonb ) x264_frame_push_unused( h, h->lookahead->last_nonb ); @@ -65,11 +65,11 @@ } #if HAVE_THREAD -static void x264_lookahead_slicetype_decide( x264_t *h ) +static void lookahead_slicetype_decide( x264_t *h ) { - x264_stack_align( x264_slicetype_decide, h ); + x264_slicetype_decide( h ); - x264_lookahead_update_last_nonb( h, h->lookahead->next.list[0] ); + lookahead_update_last_nonb( h, h->lookahead->next.list[0] ); int shift_frames = h->lookahead->next.list[0]->i_bframes + 1; x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); @@ -77,24 +77,24 @@ x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex ); x264_pthread_mutex_lock( &h->lookahead->next.mutex ); - x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames ); + lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames ); x264_pthread_mutex_unlock( &h->lookahead->next.mutex ); /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */ if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) ) - x264_stack_align( x264_slicetype_analyse, h, shift_frames ); + x264_slicetype_analyse( h, shift_frames ); x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); } -static void *x264_lookahead_thread( x264_t *h ) +REALIGN_STACK static void *lookahead_thread( x264_t *h ) { while( !h->lookahead->b_exit_thread ) { x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex ); x264_pthread_mutex_lock( &h->lookahead->next.mutex ); int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size ); - x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift ); + lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift ); x264_pthread_mutex_unlock( &h->lookahead->next.mutex ); if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input ) { @@ -105,22 +105,23 @@ else { x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex ); - x264_lookahead_slicetype_decide( h ); + lookahead_slicetype_decide( h ); } } /* end of input frames */ x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex ); x264_pthread_mutex_lock( &h->lookahead->next.mutex ); - x264_lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size ); + lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size ); x264_pthread_mutex_unlock( &h->lookahead->next.mutex ); x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex ); while( h->lookahead->next.i_size ) - x264_lookahead_slicetype_decide( h ); + lookahead_slicetype_decide( h ); x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); h->lookahead->b_thread_active = 0; x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_fill ); x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); return NULL; } + #endif int x264_lookahead_init( x264_t *h, int i_slicetype_length ) @@ -152,7 +153,7 @@ if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 ) goto fail; - if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) ) + if( x264_pthread_create( &look->thread_handle, NULL, (void*)lookahead_thread, look_h ) ) goto fail; look->b_thread_active = 1; @@ -201,7 +202,7 @@ return b_empty; } -static void x264_lookahead_encoder_shift( x264_t *h ) +static void lookahead_encoder_shift( x264_t *h ) { if( !h->lookahead->ofbuf.i_size ) return; @@ -221,7 +222,7 @@ x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex ); while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active ) x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex ); - x264_lookahead_encoder_shift( h ); + lookahead_encoder_shift( h ); x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); } else @@ -230,15 +231,15 @@ if( h->frames.current[0] || !h->lookahead->next.i_size ) return; - x264_stack_align( x264_slicetype_decide, h ); - x264_lookahead_update_last_nonb( h, h->lookahead->next.list[0] ); + x264_slicetype_decide( h ); + lookahead_update_last_nonb( h, h->lookahead->next.list[0] ); int shift_frames = h->lookahead->next.list[0]->i_bframes + 1; - x264_lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames ); + lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames ); /* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */ if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) ) - x264_stack_align( x264_slicetype_analyse, h, shift_frames ); + x264_slicetype_analyse( h, shift_frames ); - x264_lookahead_encoder_shift( h ); + lookahead_encoder_shift( h ); } } diff -Nru x264-0.152.2854+gite9a5903/encoder/macroblock.c x264-0.158.2988+git-20191101.7817004/encoder/macroblock.c --- x264-0.152.2854+gite9a5903/encoder/macroblock.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/macroblock.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -123,7 +123,7 @@ /* This means that decimation can be done merely by adjusting the CBP and NNZ * rather than memsetting the coefficients. */ -static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp ) +static void mb_encode_i16x16( x264_t *h, int p, int i_qp ) { pixel *p_src = h->mb.pic.p_fenc[p]; pixel *p_dst = h->mb.pic.p_fdec[p]; @@ -242,7 +242,7 @@ * Unlike luma blocks, this can't be done with a lookup table or * other shortcut technique because of the interdependencies * between the coefficients due to the chroma DC transform. */ -static ALWAYS_INLINE int x264_mb_optimize_chroma_dc( x264_t *h, dctcoef *dct_dc, int dequant_mf[6][16], int i_qp, int chroma422 ) +static ALWAYS_INLINE int mb_optimize_chroma_dc( x264_t *h, dctcoef *dct_dc, int dequant_mf[6][16], int i_qp, int chroma422 ) { int dmf = dequant_mf[i_qp%6][0] << i_qp/6; @@ -256,7 +256,7 @@ return h->quantf.optimize_chroma_2x2_dc( dct_dc, dmf ); } -static ALWAYS_INLINE void x264_mb_encode_chroma_internal( x264_t *h, int b_inter, int i_qp, int chroma422 ) +static ALWAYS_INLINE void mb_encode_chroma_internal( x264_t *h, int b_inter, int i_qp, int chroma422 ) { int nz, nz_dc; int b_decimate = b_inter && h->mb.b_dct_decimate; @@ -316,7 +316,7 @@ if( nz_dc ) { - if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) ) + if( !mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) ) continue; h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 1; if( chroma422 ) @@ -441,7 +441,7 @@ if( !nz_dc ) /* Whole block is empty */ continue; - if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) ) + if( !mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) ) { h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 0; continue; @@ -492,12 +492,12 @@ void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp ) { if( CHROMA_FORMAT == CHROMA_420 ) - x264_mb_encode_chroma_internal( h, b_inter, i_qp, 0 ); + mb_encode_chroma_internal( h, b_inter, i_qp, 0 ); else - x264_mb_encode_chroma_internal( h, b_inter, i_qp, 1 ); + mb_encode_chroma_internal( h, b_inter, i_qp, 1 ); } -static void x264_macroblock_encode_skip( x264_t *h ) +static void macroblock_encode_skip( x264_t *h ) { M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0; M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0; @@ -615,7 +615,7 @@ /***************************************************************************** * x264_macroblock_encode: *****************************************************************************/ -static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma ) +static ALWAYS_INLINE void macroblock_encode_internal( x264_t *h, int plane_count, int chroma ) { int i_qp = h->mb.i_qp; int b_decimate = h->mb.b_dct_decimate; @@ -691,7 +691,7 @@ } } - x264_macroblock_encode_skip( h ); + macroblock_encode_skip( h ); return; } if( h->mb.i_type == B_SKIP ) @@ -699,7 +699,7 @@ /* don't do bskip motion compensation if it was already done in macroblock_analyse */ if( !h->mb.b_skip_mc ) x264_mb_mc( h ); - x264_macroblock_encode_skip( h ); + macroblock_encode_skip( h ); return; } @@ -708,7 +708,7 @@ h->mb.b_transform_8x8 = 0; for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) - x264_mb_encode_i16x16( h, p, i_qp ); + mb_encode_i16x16( h, p, i_qp ); } else if( h->mb.i_type == I_8x8 ) { @@ -974,16 +974,18 @@ void x264_macroblock_encode( x264_t *h ) { if( CHROMA444 ) - x264_macroblock_encode_internal( h, 3, 0 ); + macroblock_encode_internal( h, 3, 0 ); + else if( CHROMA_FORMAT ) + macroblock_encode_internal( h, 1, 1 ); else - x264_macroblock_encode_internal( h, 1, 1 ); + macroblock_encode_internal( h, 1, 0 ); } /***************************************************************************** * x264_macroblock_probe_skip: * Check if the current MB could be encoded as a [PB]_SKIP *****************************************************************************/ -static ALWAYS_INLINE int x264_macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma ) +static ALWAYS_INLINE int macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma ) { ALIGNED_ARRAY_64( dctcoef, dct4x4,[8],[16] ); ALIGNED_ARRAY_64( dctcoef, dctscan,[16] ); @@ -1126,12 +1128,14 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir ) { - if( CHROMA_FORMAT == CHROMA_444 ) - return x264_macroblock_probe_skip_internal( h, b_bidir, 3, CHROMA_444 ); + if( CHROMA_FORMAT == CHROMA_420 ) + return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_420 ); else if( CHROMA_FORMAT == CHROMA_422 ) - return x264_macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_422 ); + return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_422 ); + else if( CHROMA_FORMAT == CHROMA_444 ) + return macroblock_probe_skip_internal( h, b_bidir, 3, CHROMA_444 ); else - return x264_macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_420 ); + return macroblock_probe_skip_internal( h, b_bidir, 1, CHROMA_400 ); } /**************************************************************************** @@ -1172,7 +1176,7 @@ * RD only; 4 calls to this do not make up for one macroblock_encode. * doesn't transform chroma dc. *****************************************************************************/ -static ALWAYS_INLINE void x264_macroblock_encode_p8x8_internal( x264_t *h, int i8, int plane_count, int chroma ) +static ALWAYS_INLINE void macroblock_encode_p8x8_internal( x264_t *h, int i8, int plane_count, int chroma ) { int b_decimate = h->mb.b_dct_decimate; int i_qp = h->mb.i_qp; @@ -1365,18 +1369,20 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) { - if( CHROMA444 ) - x264_macroblock_encode_p8x8_internal( h, i8, 3, CHROMA_444 ); + if( CHROMA_FORMAT == CHROMA_420 ) + macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_420 ); else if( CHROMA_FORMAT == CHROMA_422 ) - x264_macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_422 ); + macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_422 ); + else if( CHROMA_FORMAT == CHROMA_444 ) + macroblock_encode_p8x8_internal( h, i8, 3, CHROMA_444 ); else - x264_macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_420 ); + macroblock_encode_p8x8_internal( h, i8, 1, CHROMA_400 ); } /***************************************************************************** * RD only, luma only (for 4:2:0) *****************************************************************************/ -static ALWAYS_INLINE void x264_macroblock_encode_p4x4_internal( x264_t *h, int i4, int plane_count ) +static ALWAYS_INLINE void macroblock_encode_p4x4_internal( x264_t *h, int i4, int plane_count ) { int i_qp = h->mb.i_qp; @@ -1413,7 +1419,7 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i8 ) { if( CHROMA444 ) - x264_macroblock_encode_p4x4_internal( h, i8, 3 ); + macroblock_encode_p4x4_internal( h, i8, 3 ); else - x264_macroblock_encode_p4x4_internal( h, i8, 1 ); + macroblock_encode_p4x4_internal( h, i8, 1 ); } diff -Nru x264-0.152.2854+gite9a5903/encoder/macroblock.h x264-0.158.2988+git-20191101.7817004/encoder/macroblock.h --- x264-0.152.2854+gite9a5903/encoder/macroblock.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/macroblock.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -29,11 +29,10 @@ #include "common/macroblock.h" -extern const int x264_lambda2_tab[QP_MAX_MAX+1]; -extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1]; - +#define x264_rdo_init x264_template(rdo_init) void x264_rdo_init( void ); +#define x264_macroblock_probe_skip x264_template(macroblock_probe_skip) int x264_macroblock_probe_skip( x264_t *h, int b_bidir ); #define x264_macroblock_probe_pskip( h )\ @@ -41,32 +40,51 @@ #define x264_macroblock_probe_bskip( h )\ x264_macroblock_probe_skip( h, 1 ) +#define x264_predict_lossless_4x4 x264_template(predict_lossless_4x4) void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode ); +#define x264_predict_lossless_8x8 x264_template(predict_lossless_8x8) void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] ); +#define x264_predict_lossless_16x16 x264_template(predict_lossless_16x16) void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode ); +#define x264_predict_lossless_chroma x264_template(predict_lossless_chroma) void x264_predict_lossless_chroma( x264_t *h, int i_mode ); +#define x264_macroblock_encode x264_template(macroblock_encode) void x264_macroblock_encode ( x264_t *h ); +#define x264_macroblock_write_cabac x264_template(macroblock_write_cabac) void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); +#define x264_macroblock_write_cavlc x264_template(macroblock_write_cavlc) void x264_macroblock_write_cavlc ( x264_t *h ); +#define x264_macroblock_encode_p8x8 x264_template(macroblock_encode_p8x8) void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); +#define x264_macroblock_encode_p4x4 x264_template(macroblock_encode_p4x4) void x264_macroblock_encode_p4x4( x264_t *h, int i4 ); +#define x264_mb_encode_chroma x264_template(mb_encode_chroma) void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp ); +#define x264_cabac_mb_skip x264_template(cabac_mb_skip) void x264_cabac_mb_skip( x264_t *h, int b_skip ); +#define x264_cabac_block_residual_c x264_template(cabac_block_residual_c) void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ); +#define x264_cabac_block_residual_8x8_rd_c x264_template(cabac_block_residual_8x8_rd_c) void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ); +#define x264_cabac_block_residual_rd_c x264_template(cabac_block_residual_rd_c) void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l ); +#define x264_quant_luma_dc_trellis x264_template(quant_luma_dc_trellis) int x264_quant_luma_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int ctx_block_cat, int b_intra, int idx ); +#define x264_quant_chroma_dc_trellis x264_template(quant_chroma_dc_trellis) int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx ); +#define x264_quant_4x4_trellis x264_template(quant_4x4_trellis) int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx ); +#define x264_quant_8x8_trellis x264_template(quant_8x8_trellis) int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx ); +#define x264_noise_reduction_update x264_template(noise_reduction_update) void x264_noise_reduction_update( x264_t *h ); static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx ) @@ -195,4 +213,3 @@ } #endif - diff -Nru x264-0.152.2854+gite9a5903/encoder/me.c x264-0.158.2988+git-20191101.7817004/encoder/me.c --- x264-0.152.2854+gite9a5903/encoder/me.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/me.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * me.c: motion estimation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -424,7 +424,7 @@ /* Uneven-cross Multi-Hexagon-grid Search * as in JM, except with different early termination */ - static const uint8_t x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 }; + static const uint8_t pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 }; int ucost1, ucost2; int cross_start = 1; @@ -446,7 +446,7 @@ omx = bmx; omy = bmy; /* early termination */ -#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) ) +#define SAD_THRESH(v) ( bcost < ( v >> pixel_size_shift[i_pixel] ) ) if( bcost == ucost2 && SAD_THRESH(2000) ) { COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 ); @@ -633,7 +633,6 @@ /* successive elimination by comparing DC before a full SAD, * because sum(abs(diff)) >= abs(diff(sum)). */ uint16_t *sums_base = m->integral; - ALIGNED_16( static pixel zero[8*FENC_STRIDE] ) = {0}; ALIGNED_ARRAY_16( int, enc_dc,[4] ); int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4; int delta = x264_pixel_size[sad_size].w; @@ -641,7 +640,7 @@ int xn; uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2); - h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta, + h->pixf.sad_x4[sad_size]( (pixel*)x264_zero, p_fenc, p_fenc+delta, p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE, FENC_STRIDE, enc_dc ); if( delta == 4 ) @@ -1012,7 +1011,7 @@ src[2][list][i] = h->mc.get_ref( pixv_buf[list][i], &stride[2][list][i], &m->p_fref[8],\ m->i_stride[2], mvx, mvy, bw, bh, x264_weight_none );\ }\ - else\ + else if( CHROMA_FORMAT )\ h->mc.mc_chroma( pixu_buf[list][i], pixv_buf[list][i], 8, m->p_fref[4], m->i_stride[1],\ mvx, 2*(mvy+mv##list##y_offset)>>chroma_v_shift, bw>>1, bh>>chroma_v_shift );\ }\ @@ -1022,9 +1021,10 @@ /* Don't unroll the BIME_CACHE loop. I couldn't find any way to force this * other than making its iteration count not a compile-time constant. */ +#define x264_iter_kludge x264_template(iter_kludge) int x264_iter_kludge = 0; -static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd ) +static ALWAYS_INLINE void me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2, int rd ) { int x = i8&1; int y = i8>>1; @@ -1134,7 +1134,7 @@ h->mc.avg[i_pixel]( pixu, FDEC_STRIDE, src[1][0][i0], stride[1][0][i0], src[1][1][i1], stride[1][1][i1], i_weight ); h->mc.avg[i_pixel]( pixv, FDEC_STRIDE, src[2][0][i0], stride[2][0][i0], src[2][1][i1], stride[2][1][i1], i_weight ); } - else + else if( CHROMA_FORMAT ) { h->mc.avg[chromapix]( pixu, FDEC_STRIDE, pixu_buf[0][i0], 8, pixu_buf[1][i1], 8, i_weight ); h->mc.avg[chromapix]( pixv, FDEC_STRIDE, pixv_buf[0][i0], 8, pixv_buf[1][i1], 8, i_weight ); @@ -1179,7 +1179,7 @@ void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ) { - x264_me_refine_bidir( h, m0, m1, i_weight, 0, 0, 0 ); + me_refine_bidir( h, m0, m1, i_weight, 0, 0, 0 ); } void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2 ) @@ -1187,7 +1187,7 @@ /* Motion compensation is done as part of bidir_rd; don't repeat * it in encoding. */ h->mb.b_skip_mc = 1; - x264_me_refine_bidir( h, m0, m1, i_weight, i8, i_lambda2, 1 ); + me_refine_bidir( h, m0, m1, i_weight, i8, i_lambda2, 1 ); h->mb.b_skip_mc = 0; } @@ -1216,7 +1216,7 @@ h->mc.mc_luma( pixu, FDEC_STRIDE, &m->p_fref[4], m->i_stride[1], mx, my, bw, bh, &m->weight[1] ); \ h->mc.mc_luma( pixv, FDEC_STRIDE, &m->p_fref[8], m->i_stride[2], mx, my, bw, bh, &m->weight[2] ); \ } \ - else if( m->i_pixel <= PIXEL_8x8 ) \ + else if( CHROMA_FORMAT && m->i_pixel <= PIXEL_8x8 ) \ { \ h->mc.mc_chroma( pixu, pixv, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], \ mx, 2*(my+mvy_offset)>>chroma_v_shift, bw>>1, bh>>chroma_v_shift ); \ diff -Nru x264-0.152.2854+gite9a5903/encoder/me.h x264-0.158.2988+git-20191101.7817004/encoder/me.h --- x264-0.152.2854+gite9a5903/encoder/me.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/me.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * me.h: motion estimation ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -24,8 +24,8 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_ME_H -#define X264_ME_H +#ifndef X264_ENCODER_ME_H +#define X264_ENCODER_ME_H #define COST_MAX (1<<28) #define COST_MAX64 (1ULL<<60) @@ -55,15 +55,22 @@ ALIGNED_4( int16_t mv[2] ); } ALIGNED_64( x264_me_t ); +#define x264_me_search_ref x264_template(me_search_ref) void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_fullpel_thresh ); #define x264_me_search( h, m, mvc, i_mvc )\ x264_me_search_ref( h, m, mvc, i_mvc, NULL ) +#define x264_me_refine_qpel x264_template(me_refine_qpel) void x264_me_refine_qpel( x264_t *h, x264_me_t *m ); +#define x264_me_refine_qpel_refdupe x264_template(me_refine_qpel_refdupe) void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh ); +#define x264_me_refine_qpel_rd x264_template(me_refine_qpel_rd) void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int i_list ); +#define x264_me_refine_bidir_rd x264_template(me_refine_bidir_rd) void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2 ); +#define x264_me_refine_bidir_satd x264_template(me_refine_bidir_satd) void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ); +#define x264_rd_cost_part x264_template(rd_cost_part) uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ); #define COPY1_IF_LT(x,y)\ diff -Nru x264-0.152.2854+gite9a5903/encoder/ratecontrol.c x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.c --- x264-0.152.2854+gite9a5903/encoder/ratecontrol.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.c: ratecontrol ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Michael Niedermayer @@ -154,8 +154,8 @@ } mbtree; /* MBRC stuff */ - float frame_size_estimated; /* Access to this variable must be atomic: double is - * not atomic on all arches we care about */ + volatile float frame_size_estimated; /* Access to this variable must be atomic: double is + * not atomic on all arches we care about */ double frame_size_maximum; /* Maximum frame size due to MinCR */ double frame_size_planned; double slice_size_planned; @@ -243,7 +243,7 @@ stride <<= b_field; if( b_chroma ) { - ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] ); + ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] ); int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; int shift = 7 - CHROMA_V_SHIFT; @@ -256,7 +256,7 @@ } // Find the total AC energy of the block in all planes. -static NOINLINE uint32_t x264_ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame ) +static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame ) { /* This function contains annoying hacks because GCC has a habit of reordering emms * and putting it after floating point ops. As a result, we put the emms at the end of the @@ -278,7 +278,7 @@ var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 1, 1 ); var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, 0, 0 ); } - else + else if( CHROMA_FORMAT ) { var_interlaced += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 1, 1 ); var_progressive += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, 0, 0 ); @@ -293,7 +293,7 @@ var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 0, PARAM_INTERLACED, 1 ); var += ac_energy_plane( h, mb_x, mb_y, frame, 2, 0, PARAM_INTERLACED, 1 ); } - else + else if( CHROMA_FORMAT ) var += ac_energy_plane( h, mb_x, mb_y, frame, 1, 1, PARAM_INTERLACED, 1 ); } x264_emms(); @@ -337,7 +337,7 @@ { for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) - x264_ac_energy_mb( h, mb_x, mb_y, frame ); + ac_energy_mb( h, mb_x, mb_y, frame ); } else return; @@ -358,7 +358,7 @@ for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ ) for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ ) { - uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame ); + uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame ); float qp_adj = powf( energy * bit_depth_correction + 1, 0.125f ); frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj; avg_adj += qp_adj; @@ -390,7 +390,7 @@ } else { - uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame ); + uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame ); qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8))); } if( quant_offsets ) @@ -413,7 +413,7 @@ } } -static int x264_macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc ) +static int macroblock_tree_rescale_init( x264_t *h, x264_ratecontrol_t *rc ) { /* Use fractional QP array dimensions to compensate for edge padding */ float srcdim[2] = {rc->mbtree.srcdim[0] / 16.f, rc->mbtree.srcdim[1] / 16.f}; @@ -486,7 +486,7 @@ return -1; } -static void x264_macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc ) +static void macroblock_tree_rescale_destroy( x264_ratecontrol_t *rc ) { for( int i = 0; i < 2; i++ ) { @@ -505,7 +505,7 @@ return sum; } -static void x264_macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst ) +static void macroblock_tree_rescale( x264_t *h, x264_ratecontrol_t *rc, float *dst ) { float *input, *output; int filtersize, stride, height; @@ -567,14 +567,14 @@ float *dst = rc->mbtree.rescale_enabled ? rc->mbtree.scale_buffer[0] : frame->f_qp_offset; h->mc.mbtree_fix8_unpack( dst, rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], rc->mbtree.src_mb_count ); if( rc->mbtree.rescale_enabled ) - x264_macroblock_tree_rescale( h, rc, frame->f_qp_offset ); + macroblock_tree_rescale( h, rc, frame->f_qp_offset ); if( h->frames.b_have_lowres ) for( int i = 0; i < h->mb.i_mb_count; i++ ) frame->i_inv_qscale_factor[i] = x264_exp2fix8( frame->f_qp_offset[i] ); rc->mbtree.qpbuf_pos--; } else - x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets ); + x264_adaptive_quant_frame( h, frame, quant_offsets ); return 0; fail: x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" ); @@ -618,7 +618,7 @@ return 0; } -static char *x264_strcat_filename( char *input, char *suffix ) +static char *strcat_filename( char *input, char *suffix ) { char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 ); if( !output ) @@ -771,9 +771,9 @@ rc->last_non_b_pict_type = -1; rc->cbr_decay = 1.0; - if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read ) + if( h->param.rc.i_rc_method != X264_RC_ABR && h->param.rc.b_stat_read ) { - x264_log( h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n" ); + x264_log( h, X264_LOG_ERROR, "CRF/CQP is incompatible with 2pass.\n" ); return -1; } @@ -878,7 +878,7 @@ } if( h->param.rc.b_mb_tree ) { - char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); + char *mbtree_stats_in = strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); if( !mbtree_stats_in ) return -1; rc->p_mbtree_stat_file_in = x264_fopen( mbtree_stats_in, "rb" ); @@ -941,6 +941,7 @@ CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh ); CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop ); CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat ); + CMP_OPT_FIRST_PASS( "mbtree", h->param.rc.b_mb_tree ); if( (p = strstr( opts, "interlaced=" )) ) { @@ -1154,7 +1155,7 @@ if( h->param.rc.b_stat_write ) { char *p; - rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" ); + rc->psz_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".temp" ); if( !rc->psz_stat_file_tmpname ) return -1; @@ -1171,8 +1172,8 @@ x264_free( p ); if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read ) { - rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" ); - rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" ); + rc->psz_mbtree_stat_file_tmpname = strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" ); + rc->psz_mbtree_stat_file_name = strcat_filename( h->param.rc.psz_stat_out, ".mbtree" ); if( !rc->psz_mbtree_stat_file_tmpname || !rc->psz_mbtree_stat_file_name ) return -1; @@ -1192,7 +1193,7 @@ rc->mbtree.srcdim[0] = h->param.i_width; rc->mbtree.srcdim[1] = h->param.i_height; } - if( x264_macroblock_tree_rescale_init( h, rc ) < 0 ) + if( macroblock_tree_rescale_init( h, rc ) < 0 ) return -1; } @@ -1327,9 +1328,10 @@ static x264_zone_t *get_zone( x264_t *h, int frame_num ) { - for( int i = h->rc->i_zones - 1; i >= 0; i-- ) + x264_ratecontrol_t *rc = h->rc; + for( int i = rc->i_zones - 1; i >= 0; i-- ) { - x264_zone_t *z = &h->rc->zones[i]; + x264_zone_t *z = &rc->zones[i]; if( frame_num >= z->i_start && frame_num <= z->i_end ) return z; } @@ -1385,7 +1387,7 @@ x264_free( rc->pred_b_from_p ); x264_free( rc->entry ); x264_free( rc->entry_out ); - x264_macroblock_tree_rescale_destroy( rc ); + macroblock_tree_rescale_destroy( rc ); if( rc->zones ) { x264_free( rc->zones[0].param ); @@ -1432,7 +1434,7 @@ { int frame = h->fenc->i_frame; assert( frame >= 0 && frame < rc->num_entries ); - rce = h->rc->rce = &h->rc->entry[frame]; + rce = rc->rce = &rc->entry[frame]; if( h->sh.i_type == SLICE_TYPE_B && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO ) @@ -1693,7 +1695,7 @@ b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; } - h->rc->frame_size_estimated = b1 - size_of_other_slices; + rc->frame_size_estimated = b1 - size_of_other_slices; /* If the current row was large enough to cause a large QP jump, try re-encoding it. */ if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row ) @@ -1709,12 +1711,12 @@ } else { - h->rc->frame_size_estimated = bits_so_far; + rc->frame_size_estimated = bits_so_far; /* Last-ditch attempt: if the last row of the frame underflowed the VBV, * try again. */ if( rc->qpm < qp_max && can_reencode_row - && (h->rc->frame_size_estimated + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) ) + && (bits_so_far + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) ) { rc->qpm = qp_max; rc->qpa_rc = rc->qpa_rc_prev; @@ -2211,7 +2213,7 @@ rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final_min / h->sps->vui.i_time_scale; if( h->i_thread_frames > 1 ) { - int j = h->rc - h->thread[0]->rc; + int j = rcc - h->thread[0]->rc; for( int i = 1; i < h->i_thread_frames; i++ ) { x264_t *t = h->thread[ (j+i)%h->i_thread_frames ]; @@ -2445,7 +2447,7 @@ /* Limit planned size by MinCR */ if( rcc->b_vbv ) rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); - h->rc->frame_size_estimated = rcc->frame_size_planned; + rcc->frame_size_estimated = rcc->frame_size_planned; /* For row SATDs */ if( rcc->b_vbv ) @@ -2458,7 +2460,7 @@ double predicted_bits = total_bits; if( h->i_thread_frames > 1 ) { - int j = h->rc - h->thread[0]->rc; + int j = rcc - h->thread[0]->rc; for( int i = 1; i < h->i_thread_frames; i++ ) { x264_t *t = h->thread[(j+i) % h->i_thread_frames]; @@ -2627,12 +2629,12 @@ /* Limit planned size by MinCR */ if( rcc->b_vbv ) rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); - h->rc->frame_size_estimated = rcc->frame_size_planned; + rcc->frame_size_estimated = rcc->frame_size_planned; return q; } } -static void x264_threads_normalize_predictors( x264_t *h ) +static void threads_normalize_predictors( x264_t *h ) { double totalsize = 0; for( int i = 0; i < h->param.i_threads; i++ ) @@ -2677,7 +2679,7 @@ } if( rc->b_vbv && rc->frame_size_planned ) { - x264_threads_normalize_predictors( h ); + threads_normalize_predictors( h ); if( rc->single_frame_vbv ) { @@ -2688,7 +2690,7 @@ float max_frame_error = x264_clip3f( 1.0 / (t->i_threadslice_end - t->i_threadslice_start), 0.05, 0.25 ); t->rc->slice_size_planned += 2 * max_frame_error * rc->frame_size_planned; } - x264_threads_normalize_predictors( h ); + threads_normalize_predictors( h ); } for( int i = 0; i < h->param.i_threads; i++ ) diff -Nru x264-0.152.2854+gite9a5903/encoder/ratecontrol.h x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.h --- x264-0.152.2854+gite9a5903/encoder/ratecontrol.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/ratecontrol.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.h: ratecontrol ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -24,8 +24,8 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#ifndef X264_RATECONTROL_H -#define X264_RATECONTROL_H +#ifndef X264_ENCODER_RATECONTROL_H +#define X264_ENCODER_RATECONTROL_H /* Completely arbitrary. Ratecontrol lowers relative quality at higher framerates * and the reverse at lower framerates; this serves as the center of the curve. @@ -39,28 +39,49 @@ #define CLIP_DURATION(f) x264_clip3f(f,MIN_FRAME_DURATION,MAX_FRAME_DURATION) +#define x264_ratecontrol_new x264_template(ratecontrol_new) int x264_ratecontrol_new ( x264_t * ); +#define x264_ratecontrol_delete x264_template(ratecontrol_delete) void x264_ratecontrol_delete( x264_t * ); +#define x264_ratecontrol_init_reconfigurable x264_template(ratecontrol_init_reconfigurable) void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ); +#define x264_encoder_reconfig_apply x264_template(encoder_reconfig_apply) int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param ); +#define x264_adaptive_quant_frame x264_template(adaptive_quant_frame) void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets ); +#define x264_macroblock_tree_read x264_template(macroblock_tree_read) int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets ); +#define x264_reference_build_list_optimal x264_template(reference_build_list_optimal) int x264_reference_build_list_optimal( x264_t *h ); +#define x264_thread_sync_ratecontrol x264_template(thread_sync_ratecontrol) void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ); +#define x264_ratecontrol_zone_init x264_template(ratecontrol_zone_init) void x264_ratecontrol_zone_init( x264_t * ); +#define x264_ratecontrol_start x264_template(ratecontrol_start) void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead ); +#define x264_ratecontrol_slice_type x264_template(ratecontrol_slice_type) int x264_ratecontrol_slice_type( x264_t *, int i_frame ); +#define x264_ratecontrol_set_weights x264_template(ratecontrol_set_weights) void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm ); +#define x264_ratecontrol_mb x264_template(ratecontrol_mb) int x264_ratecontrol_mb( x264_t *, int bits ); +#define x264_ratecontrol_qp x264_template(ratecontrol_qp) int x264_ratecontrol_qp( x264_t * ); +#define x264_ratecontrol_mb_qp x264_template(ratecontrol_mb_qp) int x264_ratecontrol_mb_qp( x264_t *h ); +#define x264_ratecontrol_end x264_template(ratecontrol_end) int x264_ratecontrol_end( x264_t *, int bits, int *filler ); +#define x264_ratecontrol_summary x264_template(ratecontrol_summary) void x264_ratecontrol_summary( x264_t * ); +#define x264_rc_analyse_slice x264_template(rc_analyse_slice) int x264_rc_analyse_slice( x264_t *h ); +#define x264_threads_distribute_ratecontrol x264_template(threads_distribute_ratecontrol) void x264_threads_distribute_ratecontrol( x264_t *h ); +#define x264_threads_merge_ratecontrol x264_template(threads_merge_ratecontrol) void x264_threads_merge_ratecontrol( x264_t *h ); +#define x264_hrd_fullness x264_template(hrd_fullness) void x264_hrd_fullness( x264_t *h ); -#endif +#endif diff -Nru x264-0.152.2854+gite9a5903/encoder/rdo.c x264-0.158.2988+git-20191101.7817004/encoder/rdo.c --- x264-0.152.2854+gite9a5903/encoder/rdo.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/rdo.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rdo.c: rate-distortion optimization ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -32,7 +32,9 @@ /* Transition and size tables for abs<9 MVD and residual coding */ /* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */ +#define x264_cabac_transition_unary x264_template(cabac_transition_unary) uint8_t x264_cabac_transition_unary[15][128]; +#define x264_cabac_size_unary x264_template(cabac_size_unary) uint16_t x264_cabac_size_unary[15][128]; /* Transition and size tables for abs>9 MVD */ /* Consist of 5 1s and a bypass sign bit */ @@ -46,7 +48,8 @@ #define bs_write_ue(s,v) ((s)->i_bits_encoded += bs_size_ue(v)) #define bs_write_se(s,v) ((s)->i_bits_encoded += bs_size_se(v)) #define bs_write_te(s,v,l) ((s)->i_bits_encoded += bs_size_te(v,l)) -#define x264_macroblock_write_cavlc static x264_macroblock_size_cavlc +#undef x264_macroblock_write_cavlc +#define x264_macroblock_write_cavlc static macroblock_size_cavlc #include "cavlc.c" /* CABAC: not exactly the same. x264_cabac_size_decision() keeps track of @@ -55,12 +58,14 @@ #undef x264_cabac_encode_decision_noup #undef x264_cabac_encode_bypass #undef x264_cabac_encode_terminal +#undef x264_cabac_encode_ue_bypass #define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v) #define x264_cabac_encode_decision_noup(c,x,v) x264_cabac_size_decision_noup(c,x,v) #define x264_cabac_encode_terminal(c) ((c)->f8_bits_encoded += 7) #define x264_cabac_encode_bypass(c,v) ((c)->f8_bits_encoded += 256) #define x264_cabac_encode_ue_bypass(c,e,v) ((c)->f8_bits_encoded += (bs_size_ue_big(v+(1<mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \ @@ -91,7 +96,6 @@ static const uint8_t satd_shift_x[3] = {3, 2, 2}; static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2}; static const uint8_t satd_offset[3] = {0, 8, 16}; - ALIGNED_16( static pixel zero[16] ) = {0}; int cache_index = (x >> satd_shift_x[size - PIXEL_8x4]) + (y >> satd_shift_y[size - PIXEL_8x4]) + satd_offset[size - PIXEL_8x4]; int res = h->mb.pic.fenc_satd_cache[cache_index]; @@ -100,8 +104,8 @@ else { pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE; - int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, zero, 0 ) >> 1; - res = h->pixf.satd[size]( fenc, FENC_STRIDE, zero, 0 ) - dc; + int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) >> 1; + res = h->pixf.satd[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) - dc; h->mb.pic.fenc_satd_cache[cache_index] = res + 1; return res; } @@ -118,7 +122,6 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y ) { - ALIGNED_16( static pixel zero[16] ) = {0}; int satd = 0; pixel *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE; pixel *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE; @@ -135,8 +138,8 @@ } else { - int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1; - satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - cached_satd( h, size, x, y )); + int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) >> 1; + satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) - dc - cached_satd( h, size, x, y )); } satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8; } @@ -145,13 +148,17 @@ static inline int ssd_mb( x264_t *h ) { - int chroma_size = h->luma2chroma_pixel[PIXEL_16x16]; - int chroma_ssd = ssd_plane(h, chroma_size, 1, 0, 0) + ssd_plane(h, chroma_size, 2, 0, 0); - chroma_ssd = ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8; - return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chroma_ssd; + int i_ssd = ssd_plane( h, PIXEL_16x16, 0, 0, 0 ); + if( CHROMA_FORMAT ) + { + int chroma_size = h->luma2chroma_pixel[PIXEL_16x16]; + int chroma_ssd = ssd_plane( h, chroma_size, 1, 0, 0 ) + ssd_plane( h, chroma_size, 2, 0, 0 ); + i_ssd += ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8; + } + return i_ssd; } -static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) +static int rd_cost_mb( x264_t *h, int i_lambda2 ) { int b_transform_bak = h->mb.b_transform_8x8; int i_ssd; @@ -173,12 +180,12 @@ { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_macroblock_size_cabac( h, &cabac_tmp ); + macroblock_size_cabac( h, &cabac_tmp ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16; } else { - x264_macroblock_size_cavlc( h ); + macroblock_size_cavlc( h ); i_bits = ( (uint64_t)h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8; } @@ -190,7 +197,7 @@ /* partition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */ -static uint64_t x264_rd_cost_subpart( x264_t *h, int i_lambda2, int i4, int i_pixel ) +static uint64_t rd_cost_subpart( x264_t *h, int i_lambda2, int i4, int i_pixel ) { uint64_t i_ssd, i_bits; @@ -213,11 +220,11 @@ { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_subpartition_size_cabac( h, &cabac_tmp, i4, i_pixel ); + subpartition_size_cabac( h, &cabac_tmp, i4, i_pixel ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = x264_subpartition_size_cavlc( h, i4, i_pixel ); + i_bits = subpartition_size_cavlc( h, i4, i_pixel ); return (i_ssd<<8) + i_bits; } @@ -229,12 +236,12 @@ if( i_pixel == PIXEL_16x16 ) { - int i_cost = x264_rd_cost_mb( h, i_lambda2 ); + int i_cost = rd_cost_mb( h, i_lambda2 ); return i_cost; } if( i_pixel > PIXEL_8x8 ) - return x264_rd_cost_subpart( h, i_lambda2, i4, i_pixel ); + return rd_cost_subpart( h, i_lambda2, i4, i_pixel ); h->mb.i_cbp_luma = 0; @@ -247,25 +254,28 @@ int ssd_x = 8*(i8&1); int ssd_y = 8*(i8>>1); i_ssd = ssd_plane( h, i_pixel, 0, ssd_x, ssd_y ); - int chromapix = h->luma2chroma_pixel[i_pixel]; - int chromassd = ssd_plane( h, chromapix, 1, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT ) - + ssd_plane( h, chromapix, 2, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT ); - i_ssd += ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8; + if( CHROMA_FORMAT ) + { + int chroma_size = h->luma2chroma_pixel[i_pixel]; + int chroma_ssd = ssd_plane( h, chroma_size, 1, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT ) + + ssd_plane( h, chroma_size, 2, ssd_x>>CHROMA_H_SHIFT, ssd_y>>CHROMA_V_SHIFT ); + i_ssd += ((uint64_t)chroma_ssd * h->mb.i_chroma_lambda2_offset + 128) >> 8; + } if( h->param.b_cabac ) { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_partition_size_cabac( h, &cabac_tmp, i8, i_pixel ); + partition_size_cabac( h, &cabac_tmp, i8, i_pixel ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = (uint64_t)x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; + i_bits = (uint64_t)partition_size_cavlc( h, i8, i_pixel ) * i_lambda2; return (i_ssd<<8) + i_bits; } -static uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, pixel edge[4][32] ) +static uint64_t rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode, pixel edge[4][32] ) { uint64_t i_ssd, i_bits; int plane_count = CHROMA444 ? 3 : 1; @@ -292,16 +302,16 @@ { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode ); + partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = (uint64_t)x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; + i_bits = (uint64_t)partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } -static uint64_t x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode ) +static uint64_t rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode ) { uint64_t i_ssd, i_bits; int plane_count = CHROMA444 ? 3 : 1; @@ -326,16 +336,16 @@ { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode ); + partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = (uint64_t)x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; + i_bits = (uint64_t)partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2; return (i_ssd<<8) + i_bits; } -static uint64_t x264_rd_cost_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct ) +static uint64_t rd_cost_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct ) { uint64_t i_ssd, i_bits; @@ -352,11 +362,11 @@ { x264_cabac_t cabac_tmp; COPY_CABAC; - x264_chroma_size_cabac( h, &cabac_tmp ); + chroma_size_cabac( h, &cabac_tmp ); i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else - i_bits = (uint64_t)x264_chroma_size_cavlc( h ) * i_lambda2; + i_bits = (uint64_t)chroma_size_cavlc( h ) * i_lambda2; return (i_ssd<<8) + i_bits; } @@ -907,8 +917,8 @@ const uint8_t *zigzag, int ctx_block_cat, int lambda2, int b_ac, int b_chroma, int dc, int num_coefs, int idx, int b_8x8 ) { - ALIGNED_16( dctcoef quant_coefs[2][16] ); - ALIGNED_16( dctcoef coefs[16] ) = {0}; + ALIGNED_ARRAY_16( dctcoef, quant_coefs,[2],[16] ); + ALIGNED_ARRAY_16( dctcoef, coefs,[16] ); const uint32_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab; const uint32_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab; int delta_distortion[16]; @@ -918,6 +928,9 @@ int nC = b_chroma && dc ? 3 + (num_coefs>>2) : ct_index[x264_mb_predict_non_zero_code( h, !b_chroma && dc ? (idx - LUMA_DC)*16 : idx )]; + for( i = 0; i < 16; i += 16/sizeof(*coefs) ) + M128( &coefs[i] ) = M128_ZERO; + /* Code for handling 8x8dct -> 4x4dct CAVLC munging. Input/output use a different * step/start/end than internal processing. */ int step = 1; @@ -997,7 +1010,7 @@ if( !coef_mask ) bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] ); else - x264_cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC ); + cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC ); score = (int64_t)h->out.bs.i_bits_encoded * lambda2; /* QNS loop: pick the change that improves RD the most, apply it, repeat. @@ -1030,7 +1043,7 @@ if( !cur_mask ) bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] ); else - x264_cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC ); + cavlc_block_residual_internal( h, ctx_block_cat, coefs + b_ac, nC ); cur_score += (int64_t)h->out.bs.i_bits_encoded * lambda2; coefs[i] = old_coef; @@ -1089,8 +1102,8 @@ DCT_LUMA_DC, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 1, 16, idx, 0 ); } -static const uint8_t x264_zigzag_scan2x2[4] = { 0, 1, 2, 3 }; -static const uint8_t x264_zigzag_scan2x4[8] = { 0, 2, 1, 4, 6, 3, 5, 7 }; +static const uint8_t zigzag_scan2x2[4] = { 0, 1, 2, 3 }; +static const uint8_t zigzag_scan2x4[8] = { 0, 2, 1, 4, 6, 3, 5, 7 }; int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx ) { @@ -1100,12 +1113,12 @@ if( CHROMA_FORMAT == CHROMA_422 ) { - zigzag = x264_zigzag_scan2x4; + zigzag = zigzag_scan2x4; num_coefs = 8; } else { - zigzag = x264_zigzag_scan2x2; + zigzag = zigzag_scan2x2; num_coefs = 4; } diff -Nru x264-0.152.2854+gite9a5903/encoder/set.c x264-0.158.2988+git-20191101.7817004/encoder/set.c --- x264-0.152.2854+gite9a5903/encoder/set.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/set.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set: header writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -31,7 +31,7 @@ // Indexed by pic_struct values static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 }; -const static uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A}; +static const uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A}; static void transpose( uint8_t *buf, int w ) { @@ -40,15 +40,15 @@ XCHG( uint8_t, buf[w*i+j], buf[w*j+i] ); } -static void scaling_list_write( bs_t *s, x264_pps_t *pps, int idx ) +static void scaling_list_write( bs_t *s, x264_sps_t *sps, int idx ) { const int len = idx<4 ? 16 : 64; const uint8_t *zigzag = idx<4 ? x264_zigzag_scan4[0] : x264_zigzag_scan8[0]; - const uint8_t *list = pps->scaling_list[idx]; - const uint8_t *def_list = (idx==CQM_4IC) ? pps->scaling_list[CQM_4IY] - : (idx==CQM_4PC) ? pps->scaling_list[CQM_4PY] - : (idx==CQM_8IC+4) ? pps->scaling_list[CQM_8IY+4] - : (idx==CQM_8PC+4) ? pps->scaling_list[CQM_8PY+4] + const uint8_t *list = sps->scaling_list[idx]; + const uint8_t *def_list = (idx==CQM_4IC) ? sps->scaling_list[CQM_4IY] + : (idx==CQM_4PC) ? sps->scaling_list[CQM_4PY] + : (idx==CQM_8IC+4) ? sps->scaling_list[CQM_8IY+4] + : (idx==CQM_8PC+4) ? sps->scaling_list[CQM_8PY+4] : x264_cqm_jvt[idx]; if( !memcmp( list, def_list, len ) ) bs_write1( s, 0 ); // scaling_list_present_flag @@ -105,8 +105,12 @@ sps->i_id = i_id; sps->i_mb_width = ( param->i_width + 15 ) / 16; sps->i_mb_height= ( param->i_height + 15 ) / 16; + sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced); + if( !sps->b_frame_mbs_only ) + sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1; sps->i_chroma_format_idc = csp >= X264_CSP_I444 ? CHROMA_444 : - csp >= X264_CSP_I422 ? CHROMA_422 : CHROMA_420; + csp >= X264_CSP_I422 ? CHROMA_422 : + csp >= X264_CSP_I420 ? CHROMA_420 : CHROMA_400; sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0; if( sps->b_qpprime_y_zero_transform_bypass || sps->i_chroma_format_idc == CHROMA_444 ) @@ -115,7 +119,7 @@ sps->i_profile_idc = PROFILE_HIGH422; else if( BIT_DEPTH > 8 ) sps->i_profile_idc = PROFILE_HIGH10; - else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT ) + else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT || sps->i_chroma_format_idc == CHROMA_400 ) sps->i_profile_idc = PROFILE_HIGH; else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 ) sps->i_profile_idc = PROFILE_MAIN; @@ -178,9 +182,6 @@ sps->b_vui = 1; sps->b_gaps_in_frame_num_value_allowed = 0; - sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced); - if( !sps->b_frame_mbs_only ) - sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1; sps->b_mb_adaptive_frame_field = param->b_interlaced; sps->b_direct8x8_inference = 1; @@ -197,22 +198,14 @@ sps->vui.b_color_description_present = 0; sps->vui.i_colorprim = ( param->vui.i_colorprim >= 0 && param->vui.i_colorprim <= 12 ? param->vui.i_colorprim : 2 ); - sps->vui.i_transfer = ( param->vui.i_transfer >= 0 && param->vui.i_transfer <= 17 ? param->vui.i_transfer : 2 ); - sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 11 ? param->vui.i_colmatrix : + sps->vui.i_transfer = ( param->vui.i_transfer >= 0 && param->vui.i_transfer <= 18 ? param->vui.i_transfer : 2 ); + sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 14 ? param->vui.i_colmatrix : ( csp >= X264_CSP_BGR ? 0 : 2 ) ); - if( sps->vui.i_colorprim != 2 || - sps->vui.i_transfer != 2 || - sps->vui.i_colmatrix != 2 ) - { + if( sps->vui.i_colorprim != 2 || sps->vui.i_transfer != 2 || sps->vui.i_colmatrix != 2 ) sps->vui.b_color_description_present = 1; - } - if( sps->vui.i_vidformat != 5 || - sps->vui.b_fullrange || - sps->vui.b_color_description_present ) - { + if( sps->vui.i_vidformat != 5 || sps->vui.b_fullrange || sps->vui.b_color_description_present ) sps->vui.b_signal_type_present = 1; - } /* FIXME: not sufficient for interlaced video */ sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 && @@ -247,6 +240,9 @@ sps->vui.i_log2_max_mv_length_horizontal = sps->vui.i_log2_max_mv_length_vertical = (int)log2f( X264_MAX( 1, param->analyse.i_mv_range*4-1 ) ) + 1; } + + sps->b_avcintra = !!param->i_avcintra_class; + sps->i_cqm_preset = param->i_cqm_preset; } void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param ) @@ -254,7 +250,7 @@ sps->crop.i_left = param->crop_rect.i_left; sps->crop.i_top = param->crop_rect.i_top; sps->crop.i_right = param->crop_rect.i_right + sps->i_mb_width*16 - param->i_width; - sps->crop.i_bottom = (param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height) >> !sps->b_frame_mbs_only; + sps->crop.i_bottom = param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height; sps->b_crop = sps->crop.i_left || sps->crop.i_top || sps->crop.i_right || sps->crop.i_bottom; @@ -267,6 +263,44 @@ } } +void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param ) +{ + switch( sps->i_cqm_preset ) + { + case X264_CQM_FLAT: + for( int i = 0; i < 8; i++ ) + sps->scaling_list[i] = x264_cqm_flat16; + break; + case X264_CQM_JVT: + for( int i = 0; i < 8; i++ ) + sps->scaling_list[i] = x264_cqm_jvt[i]; + break; + case X264_CQM_CUSTOM: + /* match the transposed DCT & zigzag */ + transpose( param->cqm_4iy, 4 ); + transpose( param->cqm_4py, 4 ); + transpose( param->cqm_4ic, 4 ); + transpose( param->cqm_4pc, 4 ); + transpose( param->cqm_8iy, 8 ); + transpose( param->cqm_8py, 8 ); + transpose( param->cqm_8ic, 8 ); + transpose( param->cqm_8pc, 8 ); + sps->scaling_list[CQM_4IY] = param->cqm_4iy; + sps->scaling_list[CQM_4PY] = param->cqm_4py; + sps->scaling_list[CQM_4IC] = param->cqm_4ic; + sps->scaling_list[CQM_4PC] = param->cqm_4pc; + sps->scaling_list[CQM_8IY+4] = param->cqm_8iy; + sps->scaling_list[CQM_8PY+4] = param->cqm_8py; + sps->scaling_list[CQM_8IC+4] = param->cqm_8ic; + sps->scaling_list[CQM_8PC+4] = param->cqm_8pc; + for( int i = 0; i < 8; i++ ) + for( int j = 0; j < (i < 4 ? 16 : 64); j++ ) + if( sps->scaling_list[i][j] == 0 ) + sps->scaling_list[i] = x264_cqm_jvt[i]; + break; + } +} + void x264_sps_write( bs_t *s, x264_sps_t *sps ) { bs_realign( s ); @@ -290,7 +324,26 @@ bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8 bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8 bs_write1( s, sps->b_qpprime_y_zero_transform_bypass ); - bs_write1( s, 0 ); // seq_scaling_matrix_present_flag + /* Exactly match the AVC-Intra bitstream */ + bs_write1( s, sps->b_avcintra ); // seq_scaling_matrix_present_flag + if( sps->b_avcintra ) + { + scaling_list_write( s, sps, CQM_4IY ); + scaling_list_write( s, sps, CQM_4IC ); + scaling_list_write( s, sps, CQM_4IC ); + bs_write1( s, 0 ); // no inter + bs_write1( s, 0 ); // no inter + bs_write1( s, 0 ); // no inter + scaling_list_write( s, sps, CQM_8IY+4 ); + bs_write1( s, 0 ); // no inter + if( sps->i_chroma_format_idc == CHROMA_444 ) + { + scaling_list_write( s, sps, CQM_8IC+4 ); + bs_write1( s, 0 ); // no inter + scaling_list_write( s, sps, CQM_8IC+4 ); + bs_write1( s, 0 ); // no inter + } + } } bs_write_ue( s, sps->i_log2_max_frame_num - 4 ); @@ -310,7 +363,7 @@ if( sps->b_crop ) { int h_shift = sps->i_chroma_format_idc == CHROMA_420 || sps->i_chroma_format_idc == CHROMA_422; - int v_shift = sps->i_chroma_format_idc == CHROMA_420; + int v_shift = (sps->i_chroma_format_idc == CHROMA_420) + !sps->b_frame_mbs_only; bs_write_ue( s, sps->crop.i_left >> h_shift ); bs_write_ue( s, sps->crop.i_right >> h_shift ); bs_write_ue( s, sps->crop.i_top >> v_shift ); @@ -446,43 +499,6 @@ pps->b_redundant_pic_cnt = 0; pps->b_transform_8x8_mode = param->analyse.b_transform_8x8 ? 1 : 0; - - pps->i_cqm_preset = param->i_cqm_preset; - - switch( pps->i_cqm_preset ) - { - case X264_CQM_FLAT: - for( int i = 0; i < 8; i++ ) - pps->scaling_list[i] = x264_cqm_flat16; - break; - case X264_CQM_JVT: - for( int i = 0; i < 8; i++ ) - pps->scaling_list[i] = x264_cqm_jvt[i]; - break; - case X264_CQM_CUSTOM: - /* match the transposed DCT & zigzag */ - transpose( param->cqm_4iy, 4 ); - transpose( param->cqm_4py, 4 ); - transpose( param->cqm_4ic, 4 ); - transpose( param->cqm_4pc, 4 ); - transpose( param->cqm_8iy, 8 ); - transpose( param->cqm_8py, 8 ); - transpose( param->cqm_8ic, 8 ); - transpose( param->cqm_8pc, 8 ); - pps->scaling_list[CQM_4IY] = param->cqm_4iy; - pps->scaling_list[CQM_4PY] = param->cqm_4py; - pps->scaling_list[CQM_4IC] = param->cqm_4ic; - pps->scaling_list[CQM_4PC] = param->cqm_4pc; - pps->scaling_list[CQM_8IY+4] = param->cqm_8iy; - pps->scaling_list[CQM_8PY+4] = param->cqm_8py; - pps->scaling_list[CQM_8IC+4] = param->cqm_8ic; - pps->scaling_list[CQM_8PC+4] = param->cqm_8pc; - for( int i = 0; i < 8; i++ ) - for( int j = 0; j < (i < 4 ? 16 : 64); j++ ) - if( pps->scaling_list[i][j] == 0 ) - pps->scaling_list[i] = x264_cqm_jvt[i]; - break; - } } void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps ) @@ -508,26 +524,27 @@ bs_write1( s, pps->b_constrained_intra_pred ); bs_write1( s, pps->b_redundant_pic_cnt ); - if( pps->b_transform_8x8_mode || pps->i_cqm_preset != X264_CQM_FLAT ) + int b_scaling_list = !sps->b_avcintra && sps->i_cqm_preset != X264_CQM_FLAT; + if( pps->b_transform_8x8_mode || b_scaling_list ) { bs_write1( s, pps->b_transform_8x8_mode ); - bs_write1( s, (pps->i_cqm_preset != X264_CQM_FLAT) ); - if( pps->i_cqm_preset != X264_CQM_FLAT ) + bs_write1( s, b_scaling_list ); + if( b_scaling_list ) { - scaling_list_write( s, pps, CQM_4IY ); - scaling_list_write( s, pps, CQM_4IC ); + scaling_list_write( s, sps, CQM_4IY ); + scaling_list_write( s, sps, CQM_4IC ); bs_write1( s, 0 ); // Cr = Cb - scaling_list_write( s, pps, CQM_4PY ); - scaling_list_write( s, pps, CQM_4PC ); + scaling_list_write( s, sps, CQM_4PY ); + scaling_list_write( s, sps, CQM_4PC ); bs_write1( s, 0 ); // Cr = Cb if( pps->b_transform_8x8_mode ) { - scaling_list_write( s, pps, CQM_8IY+4 ); - scaling_list_write( s, pps, CQM_8PY+4 ); + scaling_list_write( s, sps, CQM_8IY+4 ); + scaling_list_write( s, sps, CQM_8PY+4 ); if( sps->i_chroma_format_idc == CHROMA_444 ) { - scaling_list_write( s, pps, CQM_8IC+4 ); - scaling_list_write( s, pps, CQM_8PC+4 ); + scaling_list_write( s, sps, CQM_8IC+4 ); + scaling_list_write( s, sps, CQM_8PC+4 ); bs_write1( s, 0 ); // Cr = Cb bs_write1( s, 0 ); // Cr = Cb } @@ -555,7 +572,6 @@ bs_write( &q, 2, 0 ); //changing_slice_group 0 bs_align_10( &q ); - bs_flush( &q ); x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_RECOVERY_POINT ); } @@ -578,7 +594,7 @@ memcpy( payload, uuid, 16 ); sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - " - "Copy%s 2003-2017 - http://www.videolan.org/x264.html - options: %s", + "Copy%s 2003-2019 - http://www.videolan.org/x264.html - options: %s", X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts ); length = strlen(payload)+1; @@ -610,7 +626,6 @@ } bs_align_10( &q ); - bs_flush( &q ); x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_BUFFERING_PERIOD ); } @@ -642,7 +657,6 @@ } bs_align_10( &q ); - bs_flush( &q ); x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_PIC_TIMING ); } @@ -685,11 +699,26 @@ bs_write1( &q, 0 ); // frame_packing_arrangement_extension_flag bs_align_10( &q ); - bs_flush( &q ); x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_FRAME_PACKING ); } +void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s ) +{ + bs_t q; + ALIGNED_4( uint8_t tmp_buf[100] ); + M32( tmp_buf ) = 0; // shut up gcc + bs_init( &q, tmp_buf, 100 ); + + bs_realign( &q ); + + bs_write ( &q, 8, h->param.i_alternative_transfer ); // preferred_transfer_characteristics + + bs_align_10( &q ); + + x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_ALTERNATIVE_TRANSFER ); +} + void x264_filler_write( x264_t *h, bs_t *s, int filler ) { bs_realign( s ); @@ -729,7 +758,6 @@ } bs_align_10( &q ); - bs_flush( &q ); x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING ); } @@ -781,31 +809,6 @@ return 0; } -const x264_level_t x264_levels[] = -{ - { 10, 1485, 99, 396, 64, 175, 64, 64, 0, 2, 0, 0, 1 }, - { 9, 1485, 99, 396, 128, 350, 64, 64, 0, 2, 0, 0, 1 }, /* "1b" */ - { 11, 3000, 396, 900, 192, 500, 128, 64, 0, 2, 0, 0, 1 }, - { 12, 6000, 396, 2376, 384, 1000, 128, 64, 0, 2, 0, 0, 1 }, - { 13, 11880, 396, 2376, 768, 2000, 128, 64, 0, 2, 0, 0, 1 }, - { 20, 11880, 396, 2376, 2000, 2000, 128, 64, 0, 2, 0, 0, 1 }, - { 21, 19800, 792, 4752, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 }, - { 22, 20250, 1620, 8100, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 }, - { 30, 40500, 1620, 8100, 10000, 10000, 256, 32, 22, 2, 0, 1, 0 }, - { 31, 108000, 3600, 18000, 14000, 14000, 512, 16, 60, 4, 1, 1, 0 }, - { 32, 216000, 5120, 20480, 20000, 20000, 512, 16, 60, 4, 1, 1, 0 }, - { 40, 245760, 8192, 32768, 20000, 25000, 512, 16, 60, 4, 1, 1, 0 }, - { 41, 245760, 8192, 32768, 50000, 62500, 512, 16, 24, 2, 1, 1, 0 }, - { 42, 522240, 8704, 34816, 50000, 62500, 512, 16, 24, 2, 1, 1, 1 }, - { 50, 589824, 22080, 110400, 135000, 135000, 512, 16, 24, 2, 1, 1, 1 }, - { 51, 983040, 36864, 184320, 240000, 240000, 512, 16, 24, 2, 1, 1, 1 }, - { 52, 2073600, 36864, 184320, 240000, 240000, 512, 16, 24, 2, 1, 1, 1 }, - { 60, 4177920, 139264, 696320, 240000, 240000, 8192, 16, 24, 2, 1, 1, 1 }, - { 61, 8355840, 139264, 696320, 480000, 480000, 8192, 16, 24, 2, 1, 1, 1 }, - { 62, 16711680, 139264, 696320, 800000, 800000, 8192, 16, 24, 2, 1, 1, 1 }, - { 0 } -}; - #define ERROR(...)\ {\ if( verbose )\ diff -Nru x264-0.152.2854+gite9a5903/encoder/set.h x264-0.158.2988+git-20191101.7817004/encoder/set.h --- x264-0.152.2854+gite9a5903/encoder/set.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/set.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.h: header writing ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -27,21 +27,41 @@ #ifndef X264_ENCODER_SET_H #define X264_ENCODER_SET_H +#define x264_sps_init x264_template(sps_init) void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param ); +#define x264_sps_init_reconfigurable x264_template(sps_init_reconfigurable) void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param ); +#define x264_sps_init_scaling_list x264_template(sps_init_scaling_list) +void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param ); +#define x264_sps_write x264_template(sps_write) void x264_sps_write( bs_t *s, x264_sps_t *sps ); +#define x264_pps_init x264_template(pps_init) void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps ); +#define x264_pps_write x264_template(pps_write) void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps ); +#define x264_sei_recovery_point_write x264_template(sei_recovery_point_write) void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt ); +#define x264_sei_version_write x264_template(sei_version_write) int x264_sei_version_write( x264_t *h, bs_t *s ); +#define x264_validate_levels x264_template(validate_levels) int x264_validate_levels( x264_t *h, int verbose ); +#define x264_sei_buffering_period_write x264_template(sei_buffering_period_write) void x264_sei_buffering_period_write( x264_t *h, bs_t *s ); +#define x264_sei_pic_timing_write x264_template(sei_pic_timing_write) void x264_sei_pic_timing_write( x264_t *h, bs_t *s ); +#define x264_sei_dec_ref_pic_marking_write x264_template(sei_dec_ref_pic_marking_write) void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s ); +#define x264_sei_frame_packing_write x264_template(sei_frame_packing_write) void x264_sei_frame_packing_write( x264_t *h, bs_t *s ); +#define x264_sei_alternative_transfer_write x264_template(sei_alternative_transfer_write) +void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s ); +#define x264_sei_avcintra_umid_write x264_template(sei_avcintra_umid_write) int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s ); +#define x264_sei_avcintra_vanc_write x264_template(sei_avcintra_vanc_write) int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len ); +#define x264_sei_write x264_template(sei_write) void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type ); +#define x264_filler_write x264_template(filler_write) void x264_filler_write( x264_t *h, bs_t *s, int filler ); #endif diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype.c x264-0.158.2988+git-20191101.7817004/encoder/slicetype.c --- x264-0.152.2854+gite9a5903/encoder/slicetype.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * slicetype.c: lookahead analysis ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Fiona Glaser * Loren Merritt @@ -32,26 +32,21 @@ // Indexed by pic_struct values static const uint8_t delta_tfi_divisor[10] = { 0, 2, 1, 1, 2, 2, 3, 3, 4, 6 }; -static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, - x264_frame_t **frames, int p0, int p1, int b ); +static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, + x264_frame_t **frames, int p0, int p1, int b ); +#define x264_weights_analyse x264_template(weights_analyse) void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead ); #if HAVE_OPENCL -int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda ); -int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w ); -int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor ); -int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b ); -void x264_opencl_flush( x264_t *h ); -void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda ); -void x264_opencl_slicetype_end( x264_t *h ); +#include "slicetype-cl.h" #endif -static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a ) +static void lowres_context_init( x264_t *h, x264_mb_analysis_t *a ) { a->i_qp = X264_LOOKAHEAD_QP; a->i_lambda = x264_lambda_tab[ a->i_qp ]; - x264_mb_analyse_load_costs( h, a ); + mb_analyse_load_costs( h, a ); if( h->param.analyse.i_subpel_refine > 1 ) { h->mb.i_me_method = X264_MIN( X264_ME_HEX, h->param.analyse.i_me_method ); @@ -66,7 +61,7 @@ } /* makes a non-h264 weight (i.e. fix7), into an h264 weight */ -static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w ) +static void weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w ) { w->i_offset = offset; w->i_denom = 7; @@ -79,7 +74,7 @@ w->i_scale = X264_MIN( w->i_scale, 127 ); } -static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest ) +static NOINLINE pixel *weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dest ) { int ref0_distance = fenc->i_frame - ref->i_frame - 1; /* Note: this will never run during lookahead as weights_analyse is only called if no @@ -113,7 +108,7 @@ * fenc = ref + offset * v = u + stride * chroma height */ -static NOINLINE void x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv ) +static NOINLINE void weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv ) { int ref0_distance = fenc->i_frame - ref->i_frame - 1; int i_stride = fenc->i_stride[1]; @@ -145,7 +140,7 @@ x264_emms(); } -static NOINLINE pixel *x264_weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p ) +static NOINLINE pixel *weight_cost_init_chroma444( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dst, int p ) { int ref0_distance = fenc->i_frame - ref->i_frame - 1; int i_stride = fenc->i_stride[p]; @@ -173,7 +168,7 @@ return ref->plane[p]; } -static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma ) +static int weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma ) { /* Add cost of weights in the slice header. */ int lambda = x264_lambda_tab[X264_LOOKAHEAD_QP]; @@ -194,7 +189,7 @@ return lambda * numslices * ( 10 + denom_cost + 2 * (bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset )) ); } -static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w ) +static NOINLINE unsigned int weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w ) { unsigned int cost = 0; int i_stride = fenc->i_stride_lowres; @@ -214,7 +209,7 @@ int cmp = h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride ); cost += X264_MIN( cmp, fenc->i_intra_cost[i_mb] ); } - cost += x264_weight_slice_header_cost( h, w, 0 ); + cost += weight_slice_header_cost( h, w, 0 ); } else for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride ) @@ -227,7 +222,7 @@ return cost; } -static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w ) +static NOINLINE unsigned int weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w ) { unsigned int cost = 0; int i_stride = fenc->i_stride[1]; @@ -250,7 +245,7 @@ * pixels. */ cost += h->pixf.asd8( buf, 8, &src[pixoff], i_stride, height ); } - cost += x264_weight_slice_header_cost( h, w, 1 ); + cost += weight_slice_header_cost( h, w, 1 ); } else for( int y = 0; y < i_lines; y += height, pixoff = y*i_stride ) @@ -260,7 +255,7 @@ return cost; } -static NOINLINE unsigned int x264_weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p ) +static NOINLINE unsigned int weight_cost_chroma444( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w, int p ) { unsigned int cost = 0; int i_stride = fenc->i_stride[p]; @@ -277,7 +272,7 @@ w->weightfn[16>>2]( buf, 16, &ref[pixoff], i_stride, w, 16 ); cost += h->pixf.mbcmp[PIXEL_16x16]( buf, 16, &src[pixoff], i_stride ); } - cost += x264_weight_slice_header_cost( h, w, 1 ); + cost += weight_slice_header_cost( h, w, 1 ); } else for( int y = 0; y < i_lines; y += 16, pixoff = y*i_stride ) @@ -302,11 +297,12 @@ float ref_mean[3]; for( int plane = 0; plane <= 2*!b_lookahead; plane++ ) { - float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane]; - float ref_var = ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane]; + int zero_bias = !ref->i_pixel_ssd[plane]; + float fenc_var = fenc->i_pixel_ssd[plane] + zero_bias; + float ref_var = ref->i_pixel_ssd[plane] + zero_bias; guess_scale[plane] = sqrtf( fenc_var / ref_var ); - fenc_mean[plane] = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); - ref_mean[plane] = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); + fenc_mean[plane] = (float)(fenc->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); + ref_mean[plane] = (float)( ref->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); } int chroma_denom = 7; @@ -323,7 +319,7 @@ } /* Don't check chroma in lookahead, or if there wasn't a luma weight. */ - for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ ) + for( int plane = 0; plane < (CHROMA_FORMAT ? 3 : 1) && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ ) { int minoff, minscale, mindenom; unsigned int minscore, origscore; @@ -347,7 +343,7 @@ } } else - x264_weight_get_h264( round( guess_scale[plane] * 128 ), 0, &weights[plane] ); + weight_get_h264( round( guess_scale[plane] * 128 ), 0, &weights[plane] ); found = 0; mindenom = weights[plane].i_denom; @@ -360,27 +356,27 @@ if( !fenc->b_intra_calculated ) { x264_mb_analysis_t a; - x264_lowres_context_init( h, &a ); - x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0 ); + lowres_context_init( h, &a ); + slicetype_frame_cost( h, &a, &fenc, 0, 0, 0 ); } - mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] ); - origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, NULL ); + mcbuf = weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] ); + origscore = minscore = weight_cost_luma( h, fenc, mcbuf, NULL ); } else { if( CHROMA444 ) { - mcbuf = x264_weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane ); - origscore = minscore = x264_weight_cost_chroma444( h, fenc, mcbuf, NULL, plane ); + mcbuf = weight_cost_init_chroma444( h, fenc, ref, h->mb.p_weight_buf[0], plane ); + origscore = minscore = weight_cost_chroma444( h, fenc, mcbuf, NULL, plane ); } else { pixel *dstu = h->mb.p_weight_buf[0]; pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1]; if( !chroma_initted++ ) - x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv ); + weight_cost_init_chroma( h, fenc, ref, dstu, dstv ); mcbuf = plane == 1 ? dstu : dstv; - origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL ); + origscore = minscore = weight_cost_chroma( h, fenc, mcbuf, NULL ); } } @@ -409,8 +405,7 @@ * because scale has a much wider range than offset (because of denom), so * it should almost never need to be clamped. */ cur_offset = x264_clip3( cur_offset, -128, 127 ); - cur_scale = (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f; - cur_scale = x264_clip3( cur_scale, 0, 127 ); + cur_scale = x264_clip3f( (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f, 0, 127 ); } int start_offset = x264_clip3( cur_offset - offset_dist, -128, 127 ); int end_offset = x264_clip3( cur_offset + offset_dist, -128, 127 ); @@ -421,12 +416,12 @@ if( plane ) { if( CHROMA444 ) - s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane ); + s = weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane ); else - s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] ); + s = weight_cost_chroma( h, fenc, mcbuf, &weights[plane] ); } else - s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] ); + s = weight_cost_luma( h, fenc, mcbuf, &weights[plane] ); COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, i_off, found, 1 ); // Don't check any more offsets if the previous one had a lower cost than the current one @@ -487,7 +482,7 @@ if( weights[0].weightfn && b_lookahead ) { //scale lowres in lookahead for slicetype_frame_cost - pixel *src = ref->buffer_lowres[0]; + pixel *src = ref->buffer_lowres; pixel *dst = h->mb.p_weight_buf[0]; int width = ref->i_width_lowres + PADH*2; int height = ref->i_lines_lowres + PADV*2; @@ -508,10 +503,10 @@ #define NUM_ROWS 3 #define ROW_SATD (NUM_INTS + (h->mb.i_mb_y - h->i_threadslice_start)) -static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, - x264_frame_t **frames, int p0, int p1, int b, - int dist_scale_factor, int do_search[2], const x264_weight_t *w, - int *output_inter, int *output_intra ) +static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, + x264_frame_t **frames, int p0, int p1, int b, + int dist_scale_factor, int do_search[2], const x264_weight_t *w, + int *output_inter, int *output_intra ) { x264_frame_t *fref0 = frames[p0]; x264_frame_t *fref1 = frames[p1]; @@ -808,7 +803,7 @@ int *output_intra; } x264_slicetype_slice_t; -static void x264_slicetype_slice_cost( x264_slicetype_slice_t *s ) +static void slicetype_slice_cost( x264_slicetype_slice_t *s ) { x264_t *h = s->h; @@ -826,12 +821,12 @@ for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- ) for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- ) - x264_slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor, - s->do_search, s->w, s->output_inter, s->output_intra ); + slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor, + s->do_search, s->w, s->output_inter, s->output_intra ); } -static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, - x264_frame_t **frames, int p0, int p1, int b ) +static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, + x264_frame_t **frames, int p0, int p1, int b ) { int i_score = 0; int do_search[2]; @@ -923,7 +918,7 @@ output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE; output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE; - x264_threadpool_run( h->lookaheadpool, (void*)x264_slicetype_slice_cost, &s[i] ); + x264_threadpool_run( h->lookaheadpool, (void*)slicetype_slice_cost, &s[i] ); } for( int i = 0; i < h->param.i_lookahead_threads; i++ ) x264_threadpool_wait( h->lookaheadpool, &s[i] ); @@ -937,7 +932,7 @@ output_inter[0][NUM_ROWS] = output_intra[0][NUM_ROWS] = h->mb.i_mb_height; x264_slicetype_slice_t s = (x264_slicetype_slice_t){ h, a, frames, p0, p1, b, dist_scale_factor, do_search, w, output_inter[0], output_intra[0] }; - x264_slicetype_slice_cost( &s ); + slicetype_slice_cost( &s ); } /* Sum up accumulators */ @@ -993,7 +988,7 @@ /* If MB-tree changes the quantizers, we need to recalculate the frame cost without * re-running lookahead. */ -static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b ) +static int slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b ) { int i_score = 0; int *row_satd = frames[b]->i_row_satds[b-p0][p1-b]; @@ -1023,7 +1018,7 @@ /* Trade off precision in mbtree for increased range */ #define MBTREE_PRECISION 0.5f -static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance ) +static void macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance ) { int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION ); float weightdelta = 0.0; @@ -1045,7 +1040,7 @@ } } -static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced ) +static void macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced ) { uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost}; int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); @@ -1082,10 +1077,10 @@ } if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced ) - x264_macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 ); + macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 ); } -static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra ) +static void macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra ) { int idx = !b_intra; int last_nonb, cur_nonb = 1; @@ -1100,7 +1095,7 @@ int i = num_frames; if( b_intra ) - x264_slicetype_frame_cost( h, a, frames, 0, 0, 0 ); + slicetype_frame_cost( h, a, frames, 0, 0, 0 ); while( i > 0 && IS_X264_TYPE_B( frames[i]->i_type ) ) i--; @@ -1134,13 +1129,13 @@ cur_nonb--; if( cur_nonb < idx ) break; - x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb ); + slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb ); memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) ); bframes = last_nonb - cur_nonb - 1; if( h->param.i_bframe_pyramid && bframes > 1 ) { int middle = (bframes + 1)/2 + cur_nonb; - x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle ); + slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, middle ); memset( frames[middle]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) ); while( i > cur_nonb ) { @@ -1148,52 +1143,52 @@ int p1 = i < middle ? middle : last_nonb; if( i != middle ) { - x264_slicetype_frame_cost( h, a, frames, p0, p1, i ); - x264_macroblock_tree_propagate( h, frames, average_duration, p0, p1, i, 0 ); + slicetype_frame_cost( h, a, frames, p0, p1, i ); + macroblock_tree_propagate( h, frames, average_duration, p0, p1, i, 0 ); } i--; } - x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, middle, 1 ); + macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, middle, 1 ); } else { while( i > cur_nonb ) { - x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i ); - x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, i, 0 ); + slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i ); + macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, i, 0 ); i--; } } - x264_macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, last_nonb, 1 ); + macroblock_tree_propagate( h, frames, average_duration, cur_nonb, last_nonb, last_nonb, 1 ); last_nonb = cur_nonb; } if( !h->param.rc.i_lookahead ) { - x264_slicetype_frame_cost( h, a, frames, 0, last_nonb, last_nonb ); - x264_macroblock_tree_propagate( h, frames, average_duration, 0, last_nonb, last_nonb, 1 ); + slicetype_frame_cost( h, a, frames, 0, last_nonb, last_nonb ); + macroblock_tree_propagate( h, frames, average_duration, 0, last_nonb, last_nonb, 1 ); XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost ); } - x264_macroblock_tree_finish( h, frames[last_nonb], average_duration, last_nonb ); + macroblock_tree_finish( h, frames[last_nonb], average_duration, last_nonb ); if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size ) - x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], average_duration, 0 ); + macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], average_duration, 0 ); } -static int x264_vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b ) +static int vbv_frame_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1, int b ) { - int cost = x264_slicetype_frame_cost( h, a, frames, p0, p1, b ); + int cost = slicetype_frame_cost( h, a, frames, p0, p1, b ); if( h->param.rc.i_aq_mode ) { if( h->param.rc.b_mb_tree ) - return x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b ); + return slicetype_frame_cost_recalculate( h, frames, p0, p1, b ); else return frames[b]->i_cost_est_aq[b-p0][p1-b]; } return cost; } -static void x264_calculate_durations( x264_t *h, x264_frame_t *cur_frame, x264_frame_t *prev_frame, int64_t *i_cpb_delay, int64_t *i_coded_fields ) +static void calculate_durations( x264_t *h, x264_frame_t *cur_frame, x264_frame_t *prev_frame, int64_t *i_cpb_delay, int64_t *i_coded_fields ) { cur_frame->i_cpb_delay = *i_cpb_delay; cur_frame->i_dpb_output_delay = cur_frame->i_field_cnt - *i_coded_fields; @@ -1219,7 +1214,7 @@ cur_frame->i_cpb_duration = cur_frame->i_duration; } -static void x264_vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe ) +static void vbv_lookahead( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int keyframe ) { int last_nonb = 0, cur_nonb = 1, idx = 0; x264_frame_t *prev_frame = NULL; @@ -1240,11 +1235,11 @@ if( next_nonb != cur_nonb ) { int p0 = IS_X264_TYPE_I( frames[cur_nonb]->i_type ) ? cur_nonb : last_nonb; - frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb ); + frames[next_nonb]->i_planned_satd[idx] = vbv_frame_cost( h, a, frames, p0, cur_nonb, cur_nonb ); frames[next_nonb]->i_planned_type[idx] = frames[cur_nonb]->i_type; frames[cur_nonb]->i_coded_fields_lookahead = h->i_coded_fields_lookahead; frames[cur_nonb]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead; - x264_calculate_durations( h, frames[cur_nonb], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead ); + calculate_durations( h, frames[cur_nonb], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead ); if( prev_frame ) { frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration * @@ -1259,11 +1254,11 @@ /* Handle the B-frames: coded order */ for( int i = last_nonb+1; i < cur_nonb; i++, idx++ ) { - frames[next_nonb]->i_planned_satd[idx] = x264_vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i ); + frames[next_nonb]->i_planned_satd[idx] = vbv_frame_cost( h, a, frames, last_nonb, cur_nonb, i ); frames[next_nonb]->i_planned_type[idx] = X264_TYPE_B; frames[i]->i_coded_fields_lookahead = h->i_coded_fields_lookahead; frames[i]->i_cpb_delay_lookahead = h->i_cpb_delay_lookahead; - x264_calculate_durations( h, frames[i], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead ); + calculate_durations( h, frames[i], prev_frame, &h->i_cpb_delay_lookahead, &h->i_coded_fields_lookahead ); if( prev_frame ) { frames[next_nonb]->f_planned_cpb_duration[prev_frame_idx] = (double)prev_frame->i_cpb_duration * @@ -1282,10 +1277,10 @@ frames[next_nonb]->i_planned_type[idx] = X264_TYPE_AUTO; } -static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold ) +static uint64_t slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, uint64_t threshold ) { + uint64_t cost = 0; int loc = 1; - int cost = 0; int cur_nonb = 0; path--; /* Since the 1st path element is really the second frame */ while( path[loc] ) @@ -1297,9 +1292,9 @@ /* Add the cost of the non-B-frame found above */ if( path[next_nonb] == 'P' ) - cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_nonb ); + cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_nonb ); else /* I-frame */ - cost += x264_slicetype_frame_cost( h, a, frames, next_nonb, next_nonb, next_nonb ); + cost += slicetype_frame_cost( h, a, frames, next_nonb, next_nonb, next_nonb ); /* Early terminate if the cost we have found is larger than the best path cost so far */ if( cost > threshold ) break; @@ -1307,15 +1302,15 @@ if( h->param.i_bframe_pyramid && next_nonb - cur_nonb > 2 ) { int middle = cur_nonb + (next_nonb - cur_nonb)/2; - cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, middle ); + cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, middle ); for( int next_b = loc; next_b < middle && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, middle, next_b ); + cost += slicetype_frame_cost( h, a, frames, cur_nonb, middle, next_b ); for( int next_b = middle+1; next_b < next_nonb && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, middle, next_nonb, next_b ); + cost += slicetype_frame_cost( h, a, frames, middle, next_nonb, next_b ); } else for( int next_b = loc; next_b < next_nonb && cost < threshold; next_b++ ) - cost += x264_slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_b ); + cost += slicetype_frame_cost( h, a, frames, cur_nonb, next_nonb, next_b ); loc = next_nonb + 1; cur_nonb = next_nonb; @@ -1327,11 +1322,11 @@ /* Uses strings due to the fact that the speed of the control functions is negligible compared to the cost of running slicetype_frame_cost, and because it makes debugging easier. */ -static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX+1] ) +static void slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, char (*best_paths)[X264_LOOKAHEAD_MAX+1] ) { char paths[2][X264_LOOKAHEAD_MAX+1]; int num_paths = X264_MIN( h->param.i_bframe+1, length ); - int best_cost = COST_MAX; + uint64_t best_cost = COST_MAX64; int best_possible = 0; int idx = 0; @@ -1362,9 +1357,9 @@ if( possible || !best_possible ) { if( possible && !best_possible ) - best_cost = COST_MAX; + best_cost = COST_MAX64; /* Calculate the actual cost of the current path */ - int cost = x264_slicetype_path_cost( h, a, frames, paths[idx], best_cost ); + uint64_t cost = slicetype_path_cost( h, a, frames, paths[idx], best_cost ); if( cost < best_cost ) { best_cost = cost; @@ -1386,7 +1381,7 @@ if( real_scenecut && h->param.i_frame_packing == 5 && (frame->i_frame&1) ) return 0; - x264_slicetype_frame_cost( h, a, frames, p0, p1, p1 ); + slicetype_frame_cost( h, a, frames, p0, p1, p1 ); int icost = frame->i_cost_est[0][0]; int pcost = frame->i_cost_est[p1-p0][0]; @@ -1473,7 +1468,7 @@ x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, }; int num_frames, orig_num_frames, keyint_limit, framecnt; int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX ); - int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead; + int b_vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead; /* For determinism we should limit the search to the number of frames lookahead has for sure * in h->lookahead->next.list buffer, except at the end of stream. * For normal calls with (intra_minigop == 0) that is h->lookahead->i_slicetype_length + 1 frames. @@ -1490,12 +1485,12 @@ for( framecnt = 0; framecnt < i_max_search; framecnt++ ) frames[framecnt+1] = h->lookahead->next.list[framecnt]; - x264_lowres_context_init( h, &a ); + lowres_context_init( h, &a ); if( !framecnt ) { if( h->param.rc.b_mb_tree ) - x264_macroblock_tree( h, &a, frames, 0, keyframe ); + macroblock_tree( h, &a, frames, 0, keyframe ); return; } @@ -1506,7 +1501,7 @@ * there will be significant visual artifacts if the frames just before * go down in quality due to being referenced less, despite it being * more RD-optimal. */ - if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead ) + if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || b_vbv_lookahead ) num_frames = framecnt; else if( h->param.b_open_gop && num_frames < framecnt ) num_frames++; @@ -1556,7 +1551,7 @@ /* Perform the frametype analysis. */ for( int j = 2; j <= num_frames; j++ ) - x264_slicetype_path( h, &a, frames, j, best_paths ); + slicetype_path( h, &a, frames, j, best_paths ); /* Load the results of the analysis into the frame types. */ for( int j = 1; j < num_frames; j++ ) @@ -1607,9 +1602,9 @@ int bframes = j - last_nonb - 1; memset( path, 'B', bframes ); strcpy( path+bframes, "PP" ); - int cost_p = x264_slicetype_path_cost( h, &a, frames+last_nonb, path, COST_MAX ); + uint64_t cost_p = slicetype_path_cost( h, &a, frames+last_nonb, path, COST_MAX64 ); strcpy( path+bframes, "BP" ); - int cost_b = x264_slicetype_path_cost( h, &a, frames+last_nonb, path, cost_p ); + uint64_t cost_b = slicetype_path_cost( h, &a, frames+last_nonb, path, cost_p ); if( cost_b < cost_p ) frames[j]->i_type = X264_TYPE_B; @@ -1672,7 +1667,7 @@ /* Perform the actual macroblock tree analysis. * Don't go farther than the maximum keyframe interval; this helps in short GOPs. */ if( h->param.rc.b_mb_tree ) - x264_macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe ); + macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe ); /* Enforce keyframe limit. */ if( !h->param.b_intra_refresh ) @@ -1727,8 +1722,8 @@ } } - if( vbv_lookahead ) - x264_vbv_lookahead( h, &a, frames, num_frames, keyframe ); + if( b_vbv_lookahead ) + vbv_lookahead( h, &a, frames, num_frames, keyframe ); /* Restore frametypes for all frames that haven't actually been decided yet. */ for( int j = reset_start; j <= num_frames; j++ ) @@ -1899,7 +1894,7 @@ int p0, p1, b; p1 = b = bframes + 1; - x264_lowres_context_init( h, &a ); + lowres_context_init( h, &a ); frames[0] = h->lookahead->last_nonb; memcpy( &frames[1], h->lookahead->next.list, (bframes+1) * sizeof(x264_frame_t*) ); @@ -1908,12 +1903,12 @@ else // P p0 = 0; - x264_slicetype_frame_cost( h, &a, frames, p0, p1, b ); + slicetype_frame_cost( h, &a, frames, p0, p1, b ); if( (p0 != p1 || bframes) && h->param.rc.i_vbv_buffer_size ) { /* We need the intra costs for row SATDs. */ - x264_slicetype_frame_cost( h, &a, frames, b, b, b ); + slicetype_frame_cost( h, &a, frames, b, b, b ); /* We need B-frame costs for row SATDs. */ p0 = 0; @@ -1924,7 +1919,7 @@ p1++; else p1 = bframes + 1; - x264_slicetype_frame_cost( h, &a, frames, p0, p1, b ); + slicetype_frame_cost( h, &a, frames, p0, p1, b ); if( frames[b]->i_type == X264_TYPE_BREF ) p0 = b; } @@ -1961,12 +1956,12 @@ h->lookahead->next.list[i]->i_coded = i_coded++; if( i ) { - x264_calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields ); + calculate_durations( h, h->lookahead->next.list[i], h->lookahead->next.list[i-1], &h->i_cpb_delay, &h->i_coded_fields ); h->lookahead->next.list[0]->f_planned_cpb_duration[i-1] = (double)h->lookahead->next.list[i]->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; } else - x264_calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields ); + calculate_durations( h, h->lookahead->next.list[i], NULL, &h->i_cpb_delay, &h->i_coded_fields ); } } @@ -1994,9 +1989,9 @@ if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read ) { - cost = x264_slicetype_frame_cost_recalculate( h, frames, p0, p1, b ); + cost = slicetype_frame_cost_recalculate( h, frames, p0, p1, b ); if( b && h->param.rc.i_vbv_buffer_size ) - x264_slicetype_frame_cost_recalculate( h, frames, b, b, b ); + slicetype_frame_cost_recalculate( h, frames, b, b, b ); } /* In AQ, use the weighted score instead. */ else if( h->param.rc.i_aq_mode ) diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype-cl.c x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.c --- x264-0.152.2854+gite9a5903/encoder/slicetype-cl.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead) ***************************************************************************** - * Copyright (C) 2012-2017 x264 project + * Copyright (C) 2012-2019 x264 project * * Authors: Steve Borho * @@ -26,12 +26,14 @@ #include "common/common.h" #include "macroblock.h" #include "me.h" +#include "slicetype-cl.h" #if HAVE_OPENCL #ifdef _WIN32 #include #endif +#define x264_weights_analyse x264_template(weights_analyse) void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead ); /* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined @@ -67,7 +69,7 @@ h->opencl.pl_occupancy = 0; } -static void *x264_opencl_alloc_locked( x264_t *h, int bytes ) +static void *opencl_alloc_locked( x264_t *h, int bytes ) { if( h->opencl.pl_occupancy + bytes >= PAGE_LOCKED_BUF_SIZE ) x264_opencl_flush( h ); @@ -161,7 +163,7 @@ /* Copy image to the GPU, downscale to unpadded 8x8, then continue for all scales */ - char *locked = x264_opencl_alloc_locked( h, luma_length ); + char *locked = opencl_alloc_locked( h, luma_length ); memcpy( locked, fenc->plane[0], luma_length ); OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue, h->opencl.luma_16x16_image[h->opencl.last_buf], CL_FALSE, 0, luma_length, locked, 0, NULL, NULL ); @@ -169,7 +171,7 @@ if( h->param.rc.i_aq_mode && fenc->i_inv_qscale_factor ) { int size = h->mb.i_mb_count * sizeof(int16_t); - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); memcpy( locked, fenc->i_inv_qscale_factor, size ); OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue, fenc->opencl.inv_qscale_factor, CL_FALSE, 0, size, locked, 0, NULL, NULL ); } @@ -250,7 +252,7 @@ x264_opencl_flush( h ); int size = h->mb.i_mb_count * sizeof(int16_t); - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, fenc->opencl.intra_cost, CL_FALSE, 0, size, locked, 0, NULL, NULL ); h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[0][0]; h->opencl.copies[h->opencl.num_copies].src = locked; @@ -258,7 +260,7 @@ h->opencl.num_copies++; size = h->mb.i_mb_height * sizeof(int); - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.row_satds[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL ); h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[0][0]; h->opencl.copies[h->opencl.num_copies].src = locked; @@ -266,7 +268,7 @@ h->opencl.num_copies++; size = sizeof(int) * 4; - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL ); h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est[0][0]; h->opencl.copies[h->opencl.num_copies].src = locked; @@ -286,7 +288,7 @@ * applications will have self-tuning code to try many possible variables and * measure the runtime. Here we simply make an educated guess based on what we * know GPUs typically prefer. */ -static void x264_optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device ) +static void optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device ) { x264_opencl_function_t *ocl = h->opencl.ocl; size_t max_work_group = 256; /* reasonable defaults for OpenCL 1.0 devices, below APIs may fail */ @@ -425,7 +427,7 @@ if( gdims[0] < 2 || gdims[1] < 2 ) continue; gdims[0] <<= 2; - x264_optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device ); + optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device ); mb_per_group = (ldims[0] >> 2) * ldims[1]; cost_local_size = 4 * mb_per_group * sizeof(int16_t); @@ -513,7 +515,7 @@ if( h->opencl.num_copies >= MAX_FINISH_COPIES - 1 ) x264_opencl_flush( h ); - char *locked = x264_opencl_alloc_locked( h, mvlen ); + char *locked = opencl_alloc_locked( h, mvlen ); h->opencl.copies[h->opencl.num_copies].src = locked; h->opencl.copies[h->opencl.num_copies].bytes = mvlen; @@ -560,7 +562,7 @@ /* For B frames, use 4 threads per MB for BIDIR checks */ ldims = ldim_bidir; gdims[0] <<= 2; - x264_optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device ); + optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device ); int mb_per_group = (ldims[0] >> 2) * ldims[1]; cost_local_size = 4 * mb_per_group * sizeof(int16_t); satd_local_size = 16 * mb_per_group * sizeof(uint32_t); @@ -609,7 +611,7 @@ x264_opencl_flush( h ); int size = h->mb.i_mb_count * sizeof(int16_t); - char *locked = x264_opencl_alloc_locked( h, size ); + char *locked = opencl_alloc_locked( h, size ); h->opencl.copies[h->opencl.num_copies].src = locked; h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[b - p0][p1 - b]; h->opencl.copies[h->opencl.num_copies].bytes = size; @@ -617,7 +619,7 @@ h->opencl.num_copies++; size = h->mb.i_mb_height * sizeof(int); - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); h->opencl.copies[h->opencl.num_copies].src = locked; h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[b - p0][p1 - b]; h->opencl.copies[h->opencl.num_copies].bytes = size; @@ -625,7 +627,7 @@ h->opencl.num_copies++; size = 4 * sizeof(int); - locked = x264_opencl_alloc_locked( h, size ); + locked = opencl_alloc_locked( h, size ); OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL ); h->opencl.last_buf = !h->opencl.last_buf; diff -Nru x264-0.152.2854+gite9a5903/encoder/slicetype-cl.h x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.h --- x264-0.152.2854+gite9a5903/encoder/slicetype-cl.h 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/encoder/slicetype-cl.h 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,44 @@ +/***************************************************************************** + * slicetype-cl.h: OpenCL slicetype decision code (lowres lookahead) + ***************************************************************************** + * Copyright (C) 2017-2019 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_ENCODER_SLICETYPE_CL_H +#define X264_ENCODER_SLICETYPE_CL_H + +#define x264_opencl_lowres_init x264_template(opencl_lowres_init) +int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda ); +#define x264_opencl_motionsearch x264_template(opencl_motionsearch) +int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w ); +#define x264_opencl_finalize_cost x264_template(opencl_finalize_cost) +int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor ); +#define x264_opencl_precalculate_frame_cost x264_template(opencl_precalculate_frame_cost) +int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b ); +#define x264_opencl_flush x264_template(opencl_flush) +void x264_opencl_flush( x264_t *h ); +#define x264_opencl_slicetype_prep x264_template(opencl_slicetype_prep) +void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda ); +#define x264_opencl_slicetype_end x264_template(opencl_slicetype_end) +void x264_opencl_slicetype_end( x264_t *h ); + +#endif diff -Nru x264-0.152.2854+gite9a5903/example.c x264-0.158.2988+git-20191101.7817004/example.c --- x264-0.152.2854+gite9a5903/example.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/example.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * example.c: libx264 API usage example ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Anton Mitrofanov * @@ -68,6 +68,7 @@ goto fail; /* Configure non-default params */ + param.i_bitdepth = 8; param.i_csp = X264_CSP_I420; param.i_width = width; param.i_height = height; diff -Nru x264-0.152.2854+gite9a5903/extras/intel_dispatcher.h x264-0.158.2988+git-20191101.7817004/extras/intel_dispatcher.h --- x264-0.152.2854+gite9a5903/extras/intel_dispatcher.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/extras/intel_dispatcher.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * intel_dispatcher.h: intel compiler cpu dispatcher override ***************************************************************************** - * Copyright (C) 2014-2017 x264 project + * Copyright (C) 2014-2019 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.152.2854+gite9a5903/filters/filters.c x264-0.158.2988+git-20191101.7817004/filters/filters.c --- x264-0.152.2854+gite9a5903/filters/filters.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/filters.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * filters.c: common filter functions ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Diogo Franco * Steven Walters @@ -26,6 +26,7 @@ *****************************************************************************/ #include "filters.h" + #define RETURN_IF_ERROR( cond, ... ) RETURN_IF_ERR( cond, "options", NULL, __VA_ARGS__ ) char **x264_split_options( const char *opt_str, const char * const *options ) diff -Nru x264-0.152.2854+gite9a5903/filters/filters.h x264-0.158.2988+git-20191101.7817004/filters/filters.h --- x264-0.152.2854+gite9a5903/filters/filters.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/filters.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * filters.h: common filter functions ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Diogo Franco * Steven Walters diff -Nru x264-0.152.2854+gite9a5903/filters/video/cache.c x264-0.158.2988+git-20191101.7817004/filters/video/cache.c --- x264-0.152.2854+gite9a5903/filters/video/cache.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/cache.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cache.c: cache video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -25,7 +25,15 @@ #include "video.h" #include "internal.h" -#define NAME "cache" +#include "common/common.h" + +#define cache_filter x264_glue3(cache, BIT_DEPTH, filter) +#if BIT_DEPTH == 8 +#define NAME "cache_8" +#else +#define NAME "cache_10" +#endif + #define LAST_FRAME (h->first_frame + h->cur_size - 1) typedef struct diff -Nru x264-0.152.2854+gite9a5903/filters/video/crop.c x264-0.158.2988+git-20191101.7817004/filters/video/crop.c --- x264-0.152.2854+gite9a5903/filters/video/crop.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/crop.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * crop.c: crop video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * James Darnley @@ -25,6 +25,7 @@ *****************************************************************************/ #include "video.h" + #define NAME "crop" #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ ) diff -Nru x264-0.152.2854+gite9a5903/filters/video/depth.c x264-0.158.2988+git-20191101.7817004/filters/video/depth.c --- x264-0.152.2854+gite9a5903/filters/video/depth.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/depth.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * depth.c: bit-depth conversion video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Oskar Arvidsson * @@ -24,7 +24,15 @@ *****************************************************************************/ #include "video.h" -#define NAME "depth" +#include "common/common.h" + +#define depth_filter x264_glue3(depth, BIT_DEPTH, filter) +#if BIT_DEPTH == 8 +#define NAME "depth_8" +#else +#define NAME "depth_10" +#endif + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ ) cli_vid_filter_t depth_filter; @@ -43,7 +51,8 @@ static int depth_filter_csp_is_supported( int csp ) { int csp_mask = csp & X264_CSP_MASK; - return csp_mask == X264_CSP_I420 || + return csp_mask == X264_CSP_I400 || + csp_mask == X264_CSP_I420 || csp_mask == X264_CSP_I422 || csp_mask == X264_CSP_I444 || csp_mask == X264_CSP_YV12 || @@ -74,10 +83,10 @@ static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \ int width, int height, int16_t *errors ) \ { \ - const int lshift = 16-X264_BIT_DEPTH; \ - const int rshift = 16-X264_BIT_DEPTH+2; \ - const int half = 1 << (16-X264_BIT_DEPTH+1); \ - const int pixel_max = (1 << X264_BIT_DEPTH)-1; \ + const int lshift = 16-BIT_DEPTH; \ + const int rshift = 16-BIT_DEPTH+2; \ + const int half = 1 << (16-BIT_DEPTH+1); \ + const int pixel_max = (1 << BIT_DEPTH)-1; \ memset( errors, 0, (width+1) * sizeof(int16_t) ); \ for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \ { \ @@ -137,7 +146,7 @@ static void scale_image( cli_image_t *output, cli_image_t *img ) { int csp_mask = img->csp & X264_CSP_MASK; - const int shift = X264_BIT_DEPTH - 8; + const int shift = BIT_DEPTH - 8; for( int i = 0; i < img->planes; i++ ) { uint8_t *src = img->plane[i]; @@ -217,7 +226,7 @@ ret = 1; } - FAIL_IF_ERROR( bit_depth != X264_BIT_DEPTH, "this build supports only bit depth %d\n", X264_BIT_DEPTH ); + FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this filter supports only bit depth %d\n", BIT_DEPTH ); FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" ); /* only add the filter to the chain if it's needed */ diff -Nru x264-0.152.2854+gite9a5903/filters/video/fix_vfr_pts.c x264-0.158.2988+git-20191101.7817004/filters/video/fix_vfr_pts.c --- x264-0.152.2854+gite9a5903/filters/video/fix_vfr_pts.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/fix_vfr_pts.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * fix_vfr_pts.c: vfr pts fixing video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * diff -Nru x264-0.152.2854+gite9a5903/filters/video/internal.c x264-0.158.2988+git-20191101.7817004/filters/video/internal.c --- x264-0.152.2854+gite9a5903/filters/video/internal.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/internal.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * internal.c: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -24,6 +24,7 @@ *****************************************************************************/ #include "internal.h" + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ ) void x264_cli_plane_copy( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h ) diff -Nru x264-0.152.2854+gite9a5903/filters/video/internal.h x264-0.158.2988+git-20191101.7817004/filters/video/internal.h --- x264-0.152.2854+gite9a5903/filters/video/internal.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/internal.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * internal.h: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -25,6 +25,7 @@ #ifndef X264_FILTER_VIDEO_INTERNAL_H #define X264_FILTER_VIDEO_INTERNAL_H + #include "video.h" void x264_cli_plane_copy( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int w, int h ); diff -Nru x264-0.152.2854+gite9a5903/filters/video/resize.c x264-0.158.2988+git-20191101.7817004/filters/video/resize.c --- x264-0.152.2854+gite9a5903/filters/video/resize.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/resize.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * resize.c: resize video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -24,6 +24,7 @@ *****************************************************************************/ #include "video.h" + #define NAME "resize" #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ ) @@ -71,6 +72,7 @@ /* state of swapping chroma planes pre and post resize */ int pre_swap_chroma; int post_swap_chroma; + int fast_mono; /* yuv with planar luma can be "converted" to monochrome by simply ignoring chroma */ int variable_input; /* input is capable of changing properties */ int working; /* we have already started working with frames */ frame_prop_t dst; /* desired output properties */ @@ -145,6 +147,7 @@ return csp&X264_CSP_MASK; switch( csp&X264_CSP_MASK ) { + case X264_CSP_I400: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_GRAY16 : AV_PIX_FMT_GRAY8; case X264_CSP_YV12: /* specially handled via swapping chroma */ case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_YUV420P16 : AV_PIX_FMT_YUV420P; case X264_CSP_YV16: /* specially handled via swapping chroma */ @@ -201,7 +204,7 @@ { // yuv-based if( pix_desc->nb_components == 1 || pix_desc->nb_components == 2 ) // no chroma - ret = X264_CSP_I420; + ret = X264_CSP_I400; else if( pix_desc->log2_chroma_w && pix_desc->log2_chroma_h ) // reduced chroma width & height ret = (pix_number_of_planes( pix_desc ) == 2) ? X264_CSP_NV12 : X264_CSP_I420; else if( pix_desc->log2_chroma_w ) // reduced chroma width only @@ -211,7 +214,7 @@ } // now determine high depth for( int i = 0; i < pix_desc->nb_components; i++ ) - if( pix_desc->comp[i].depth_minus1 >= 8 ) + if( pix_desc->comp[i].depth > 8 ) ret |= X264_CSP_HIGH_DEPTH; return ret; } @@ -362,7 +365,7 @@ return 0; } -static int x264_init_sws_context( resizer_hnd_t *h ) +static int init_sws_context( resizer_hnd_t *h ) { if( h->ctx ) sws_freeContext( h->ctx ); @@ -397,15 +400,18 @@ return 0; /* also warn if the resizer was initialized after the first frame */ if( h->ctx || h->working ) + { x264_cli_log( NAME, X264_LOG_WARNING, "stream properties changed at pts %"PRId64"\n", in->pts ); + h->fast_mono = 0; + } h->scale = input_prop; - if( !h->buffer_allocated ) + if( !h->buffer_allocated && !h->fast_mono ) { if( x264_cli_pic_alloc_aligned( &h->buffer, h->dst_csp, h->dst.width, h->dst.height ) ) return -1; h->buffer_allocated = 1; } - FAIL_IF_ERROR( x264_init_sws_context( h ), "swscale init failed\n" ); + FAIL_IF_ERROR( init_sws_context( h ), "swscale init failed\n" ); return 0; } @@ -503,6 +509,11 @@ h->input_range ? "PC" : "TV", h->dst.range ? "PC" : "TV" ); h->dst_csp |= info->csp & X264_CSP_VFLIP; // preserve vflip + if( dst_csp == X264_CSP_I400 && + ((src_csp >= X264_CSP_I420 && src_csp <= X264_CSP_NV16) || src_csp == X264_CSP_I444 || src_csp == X264_CSP_YV24) && + h->dst.width == info->width && h->dst.height == info->height && h->dst.range == h->input_range ) + h->fast_mono = 1; /* use the input luma plane as is */ + /* if the input is not variable, initialize the context */ if( !h->variable_input ) { @@ -535,7 +546,7 @@ h->working = 1; if( h->pre_swap_chroma ) XCHG( uint8_t*, output->img.plane[1], output->img.plane[2] ); - if( h->ctx ) + if( h->ctx && !h->fast_mono ) { sws_scale( h->ctx, (const uint8_t* const*)output->img.plane, output->img.stride, 0, output->img.height, h->buffer.img.plane, h->buffer.img.stride ); diff -Nru x264-0.152.2854+gite9a5903/filters/video/select_every.c x264-0.158.2988+git-20191101.7817004/filters/video/select_every.c --- x264-0.152.2854+gite9a5903/filters/video/select_every.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/select_every.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * select_every.c: select-every video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -24,6 +24,7 @@ *****************************************************************************/ #include "video.h" + #define NAME "select_every" #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ ) @@ -62,7 +63,7 @@ h->pattern_len = 0; h->step_size = 0; int offsets[MAX_PATTERN_SIZE]; - for( char *tok, *p = opt_string; (tok = strtok( p, "," )); p = NULL ) + for( char *tok, *p = opt_string, UNUSED *saveptr = NULL; (tok = strtok_r( p, ",", &saveptr )); p = NULL ) { int val = x264_otoi( tok, -1 ); if( p ) @@ -95,7 +96,9 @@ if( max_rewind == h->step_size ) break; } - if( x264_init_vid_filter( "cache", handle, filter, info, param, (void*)max_rewind ) ) + char name[20]; + sprintf( name, "cache_%d", param->i_bitdepth ); + if( x264_init_vid_filter( name, handle, filter, info, param, (void*)max_rewind ) ) return -1; /* done initing, overwrite properties */ diff -Nru x264-0.152.2854+gite9a5903/filters/video/source.c x264-0.158.2988+git-20191101.7817004/filters/video/source.c --- x264-0.152.2854+gite9a5903/filters/video/source.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/source.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * source.c: source video filter ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * diff -Nru x264-0.152.2854+gite9a5903/filters/video/video.c x264-0.158.2988+git-20191101.7817004/filters/video/video.c --- x264-0.152.2854+gite9a5903/filters/video/video.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/video.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * video.c: video filters ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * @@ -46,12 +46,18 @@ { extern cli_vid_filter_t source_filter; first_filter = &source_filter; - REGISTER_VFILTER( cache ); +#if HAVE_BITDEPTH8 + REGISTER_VFILTER( cache_8 ); + REGISTER_VFILTER( depth_8 ); +#endif +#if HAVE_BITDEPTH10 + REGISTER_VFILTER( cache_10 ); + REGISTER_VFILTER( depth_10 ); +#endif REGISTER_VFILTER( crop ); REGISTER_VFILTER( fix_vfr_pts ); REGISTER_VFILTER( resize ); REGISTER_VFILTER( select_every ); - REGISTER_VFILTER( depth ); #if HAVE_GPL #endif } diff -Nru x264-0.152.2854+gite9a5903/filters/video/video.h x264-0.158.2988+git-20191101.7817004/filters/video/video.h --- x264-0.152.2854+gite9a5903/filters/video/video.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/filters/video/video.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * video.h: video filters ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * diff -Nru x264-0.152.2854+gite9a5903/.gitignore x264-0.158.2988+git-20191101.7817004/.gitignore --- x264-0.152.2854+gite9a5903/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/.gitignore 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,50 @@ +*~ +*.a +*.diff +*.orig +*.rej +*.dll* +*.exe +*.def +*.lib +*.pdb +*.mo +*.o +*.patch +*.pc +*.pot +*.so* +*.dylib +.*.swp +.depend +.DS_Store +TAGS +config.h +config.mak +config.log +x264_config.h +x264 +checkasm + +*.264 +*.h264 +*.2pass +*.ffindex +*.avs +*.mkv +*.flv +*.mp4 +*.y4m +*.yuv +*.log +*.mbtree +*.temp +*.pyc +*.pgd +*.pgc + +.digress_x264 +dataDec.txt +log.dec +common/oclobj.h +x264_lookahead.clbin diff -Nru x264-0.152.2854+gite9a5903/.gitlab-ci.yml x264-0.158.2988+git-20191101.7817004/.gitlab-ci.yml --- x264-0.152.2854+gite9a5903/.gitlab-ci.yml 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/.gitlab-ci.yml 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,200 @@ +stages: + - build + - test + - release + +.variables-debian-amd64: &variables-debian-amd64 + _TRIPLET: "" + _PLATFORMSUFFIX: "" + _PATH: "debian-x86_64" + _WRAPPER: "" + +.variables-debian-aarch64: &variables-debian-aarch64 + _TRIPLET: "" + _PLATFORMSUFFIX: "" + _PATH: "debian-aarch64" + _WRAPPER: "" + +.variables-win32: &variables-win32 + _TRIPLET: "i686-w64-mingw32" + _PLATFORMSUFFIX: ".exe" + _PATH: "win32" + _WRAPPER: "wine" + +.variables-win64: &variables-win64 + _TRIPLET: "x86_64-w64-mingw32" + _PLATFORMSUFFIX: ".exe" + _PATH: "win64" + _WRAPPER: "wine64" + +.variables-macos: &variables-macos + _TRIPLET: "x86_64-apple-darwin18" + _PLATFORMSUFFIX: "" + _PATH: "macos-x86_64" + _WRAPPER: "" + +.build: + stage: build + script: | + set -x + curl -L -- https://download.videolan.org/videolan/x264/av.tar.gz > av.tar.gz && tar xfzv av.tar.gz + cd av + ./bootstrap + make -j$(getconf _NPROCESSORS_ONLN) + cd .. + export PKG_CONFIG_PATH=`/bin/ls -d $PWD/av/*/lib/pkgconfig` + ./configure --enable-pic --enable-strip --extra-ldflags="-static" + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + artifacts: + name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA" + paths: + - x264${_PLATFORMSUFFIX} + - checkasm8${_PLATFORMSUFFIX} + - checkasm10${_PLATFORMSUFFIX} + expire_in: 1 week + +build-debian-amd64: + extends: .build + image: registry.videolan.org/x264-debian-unstable:20190404162201 + tags: + - docker + - amd64 + variables: *variables-debian-amd64 + +build-debian-aarch64: + extends: .build + image: registry.videolan.org/x264-debian-unstable-aarch64:20190716192245 + tags: + - docker + - aarch64 + variables: *variables-debian-aarch64 + +.build-win: + extends: build-debian-amd64 + script: | + set -x + curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/${_PATH}/last/vlc-contrib-${_TRIPLET}-`date +%Y%m%d`.tar.bz2 || curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/${_PATH}/last/vlc-contrib-${_TRIPLET}-`date --date=yesterday +%Y%m%d`.tar.bz2 + bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2 + tar xvf vlc-contrib-${_TRIPLET}-latest.tar + sed -i "s#@@CONTRIB_PREFIX@@#`pwd`/${_TRIPLET}#g" ${_TRIPLET}/lib/pkgconfig/*.pc + export PKG_CONFIG_LIBDIR=`pwd`/${_TRIPLET}/lib/pkgconfig + ./configure --host=${_TRIPLET} --cross-prefix=${_TRIPLET}- --enable-pic --enable-strip + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + +build-win32: + extends: .build-win + variables: *variables-win32 + +build-win64: + extends: .build-win + variables: *variables-win64 + +build-macos: + extends: .build + tags: + - macos + script: | + set -x + curl -O https://nightlies.videolan.org/build/contribs/vlc-contrib-${_TRIPLET}-latest.tar.bz2 + bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2 + tar xvf vlc-contrib-${_TRIPLET}-latest.tar + sed -i.bak "s#@@CONTRIB_PREFIX@@#`pwd`/${_TRIPLET}#g" ${_TRIPLET}/lib/pkgconfig/*.pc + export PKG_CONFIG_LIBDIR=`pwd`/${_TRIPLET}/lib/pkgconfig + ./configure --enable-strip + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + variables: *variables-macos + +.test: &test + stage: test + script: | + set -x + ${_WRAPPER} ./checkasm8${_PLATFORMSUFFIX} + ${_WRAPPER} ./checkasm10${_PLATFORMSUFFIX} + artifacts: + expire_in: 10 minutes + +test-debian-amd64: + <<: *test + extends: build-debian-amd64 + dependencies: + - build-debian-amd64 + variables: *variables-debian-amd64 + +test-debian-aarch64: + <<: *test + extends: build-debian-aarch64 + dependencies: + - build-debian-aarch64 + variables: *variables-debian-aarch64 + +test-win32: + <<: *test + extends: build-win32 + dependencies: + - build-win32 + variables: *variables-win32 + +test-win64: + <<: *test + extends: build-win64 + dependencies: + - build-win64 + variables: *variables-win64 + +test-macos: + <<: *test + extends: build-macos + dependencies: + - build-macos + variables: *variables-macos + +.release: &release + stage: release + script: | + set -x + _VERSION=$(./version.sh | grep _VERSION -| cut -d\ -f4-| sed 's, ,-,g' | sed 's,",,') + mv x264${_PLATFORMSUFFIX} x264-${_VERSION}${_PLATFORMSUFFIX} + when: manual + only: + - master@videolan/x264 + - stable@videolan/x264 + artifacts: + name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA" + paths: + - x264-*${_PLATFORMSUFFIX} + expire_in: '10 minutes' + +release-debian-amd64: + <<: *release + extends: build-debian-amd64 + dependencies: + - build-debian-amd64 + variables: *variables-debian-amd64 + +release-debian-aarch64: + <<: *release + extends: build-debian-aarch64 + dependencies: + - build-debian-aarch64 + variables: *variables-debian-aarch64 + +release-win32: + <<: *release + extends: build-win32 + dependencies: + - build-win32 + variables: *variables-win32 + +release-win64: + <<: *release + extends: build-win64 + dependencies: + - build-win64 + variables: *variables-win64 + +release-macos: + <<: *release + extends: build-macos + dependencies: + - build-macos + variables: *variables-macos diff -Nru x264-0.152.2854+gite9a5903/input/avs.c x264-0.158.2988+git-20191101.7817004/input/avs.c --- x264-0.152.2854+gite9a5903/input/avs.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/avs.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * avs.c: avisynth input ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Steven Walters * Anton Mitrofanov @@ -25,6 +25,7 @@ *****************************************************************************/ #include "input.h" + #if USE_AVXSYNTH #include #if SYS_MACOSX @@ -39,7 +40,6 @@ #define avs_close FreeLibrary #define avs_address GetProcAddress #endif -#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ ) #define AVSC_NO_DECLSPEC #undef EXTERN_C @@ -50,6 +50,8 @@ #endif #define AVSC_DECLARE_FUNC(name) name##_func name +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ ) + /* AVS uses a versioned interface to control backwards compatibility */ /* YV12 support is required, which was added in 2.5 */ #define AVS_INTERFACE_25 2 @@ -123,7 +125,7 @@ } avs_hnd_t; /* load the library and functions we require from it */ -static int x264_avs_load_library( avs_hnd_t *h ) +static int custom_avs_load_library( avs_hnd_t *h ) { h->library = avs_open(); if( !h->library ) @@ -188,6 +190,8 @@ #define AVS_IS_YUV420P16( vi ) (0) #define AVS_IS_YUV422P16( vi ) (0) #define AVS_IS_YUV444P16( vi ) (0) +#define AVS_IS_Y( vi ) (0) +#define AVS_IS_Y16( vi ) (0) #else #define AVS_IS_AVISYNTHPLUS (h->func.avs_is_420 && h->func.avs_is_422 && h->func.avs_is_444) #define AVS_IS_420( vi ) (h->func.avs_is_420 ? h->func.avs_is_420( vi ) : avs_is_yv12( vi )) @@ -198,6 +202,8 @@ #define AVS_IS_YUV420P16( vi ) (h->func.avs_is_yuv420p16 && h->func.avs_is_yuv420p16( vi )) #define AVS_IS_YUV422P16( vi ) (h->func.avs_is_yuv422p16 && h->func.avs_is_yuv422p16( vi )) #define AVS_IS_YUV444P16( vi ) (h->func.avs_is_yuv444p16 && h->func.avs_is_yuv444p16( vi )) +#define AVS_IS_Y( vi ) (h->func.avs_is_y ? h->func.avs_is_y( vi ) : avs_is_y8( vi )) +#define AVS_IS_Y16( vi ) (h->func.avs_is_y16 && h->func.avs_is_y16( vi )) #endif /* generate a filter sequence to try based on the filename extension */ @@ -259,7 +265,7 @@ avs_hnd_t *h = calloc( 1, sizeof(avs_hnd_t) ); if( !h ) return -1; - FAIL_IF_ERROR( x264_avs_load_library( h ), "failed to load avisynth\n" ); + FAIL_IF_ERROR( custom_avs_load_library( h ), "failed to load avisynth\n" ); h->env = h->func.avs_create_script_environment( AVS_INTERFACE_25 ); if( h->func.avs_get_error ) { @@ -286,7 +292,7 @@ if( !strcasecmp( filename_ext, "avs" ) ) { res = h->func.avs_invoke( h->env, "Import", arg, NULL ); - FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) ); + FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_error( res ) ); /* check if the user is using a multi-threaded script and apply distributor if necessary. adapted from avisynth's vfw interface */ AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL ); @@ -337,16 +343,17 @@ { x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" ); AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL ); - FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" ); + FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames: %s\n", avs_as_error( tmp ) ); res = update_clip( h, &vi, tmp, res ); info->interlaced = 1; info->tff = avs_is_tff( vi ); } #if !HAVE_SWSCALE /* if swscale is not available, convert the CSP if necessary */ - FAIL_IF_ERROR( avs_version < 2.6f && (opt->output_csp == X264_CSP_I422 || opt->output_csp == X264_CSP_I444), - "avisynth >= 2.6 is required for i422/i444 output\n" ); - if( (opt->output_csp == X264_CSP_I420 && !AVS_IS_420( vi )) || + FAIL_IF_ERROR( avs_version < 2.6f && (opt->output_csp == X264_CSP_I400 || opt->output_csp == X264_CSP_I422 || opt->output_csp == X264_CSP_I444), + "avisynth >= 2.6 is required for i400/i422/i444 output\n" ); + if( (opt->output_csp == X264_CSP_I400 && !AVS_IS_Y( vi )) || + (opt->output_csp == X264_CSP_I420 && !AVS_IS_420( vi )) || (opt->output_csp == X264_CSP_I422 && !AVS_IS_422( vi )) || (opt->output_csp == X264_CSP_I444 && !AVS_IS_444( vi )) || (opt->output_csp == X264_CSP_RGB && !avs_is_rgb( vi )) ) @@ -354,46 +361,58 @@ const char *csp; if( AVS_IS_AVISYNTHPLUS ) { - csp = opt->output_csp == X264_CSP_I420 ? "YUV420" : + csp = opt->output_csp == X264_CSP_I400 ? "Y" : + opt->output_csp == X264_CSP_I420 ? "YUV420" : opt->output_csp == X264_CSP_I422 ? "YUV422" : opt->output_csp == X264_CSP_I444 ? "YUV444" : "RGB"; } else { - csp = opt->output_csp == X264_CSP_I420 ? "YV12" : + csp = opt->output_csp == X264_CSP_I400 ? "Y8" : + opt->output_csp == X264_CSP_I420 ? "YV12" : opt->output_csp == X264_CSP_I422 ? "YV16" : opt->output_csp == X264_CSP_I444 ? "YV24" : "RGB"; } x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to %s\n", csp ); - FAIL_IF_ERROR( opt->output_csp < X264_CSP_I444 && (vi->width&1), - "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height ); - FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && info->interlaced && (vi->height&3), - "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height ); - FAIL_IF_ERROR( (opt->output_csp == X264_CSP_I420 || info->interlaced) && (vi->height&1), - "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height ); + if( opt->output_csp != X264_CSP_I400 ) + { + FAIL_IF_ERROR( opt->output_csp < X264_CSP_I444 && (vi->width&1), + "input clip width not divisible by 2 (%dx%d)\n", vi->width, vi->height ); + FAIL_IF_ERROR( opt->output_csp == X264_CSP_I420 && info->interlaced && (vi->height&3), + "input clip height not divisible by 4 (%dx%d)\n", vi->width, vi->height ); + FAIL_IF_ERROR( (opt->output_csp == X264_CSP_I420 || info->interlaced) && (vi->height&1), + "input clip height not divisible by 2 (%dx%d)\n", vi->width, vi->height ); + } char conv_func[16]; snprintf( conv_func, sizeof(conv_func), "ConvertTo%s", csp ); - char matrix[7] = ""; - int arg_count = 2; + AVS_Value arg_arr[3]; + const char *arg_name[3]; + int arg_count = 1; + arg_arr[0] = res; + arg_name[0] = NULL; + if( opt->output_csp != X264_CSP_I400 ) + { + arg_arr[arg_count] = avs_new_value_bool( info->interlaced ); + arg_name[arg_count] = "interlaced"; + arg_count++; + } /* if doing a rgb <-> yuv conversion then range is handled via 'matrix'. though it's only supported in 2.56+ */ + char matrix[7]; if( avs_version >= 2.56f && ((opt->output_csp == X264_CSP_RGB && avs_is_yuv( vi )) || (opt->output_csp != X264_CSP_RGB && avs_is_rgb( vi ))) ) { // if converting from yuv, then we specify the matrix for the input, otherwise use the output's. int use_pc_matrix = avs_is_yuv( vi ) ? opt->input_range == RANGE_PC : opt->output_range == RANGE_PC; snprintf( matrix, sizeof(matrix), "%s601", use_pc_matrix ? "PC." : "Rec" ); /* FIXME: use correct coefficients */ + arg_arr[arg_count] = avs_new_value_string( matrix ); + arg_name[arg_count] = "matrix"; arg_count++; // notification that the input range has changed to the desired one opt->input_range = opt->output_range; } - const char *arg_name[] = { NULL, "interlaced", "matrix" }; - AVS_Value arg_arr[3]; - arg_arr[0] = res; - arg_arr[1] = avs_new_value_bool( info->interlaced ); - arg_arr[2] = avs_new_value_string( matrix ); AVS_Value res2 = h->func.avs_invoke( h->env, conv_func, avs_new_value_array( arg_arr, arg_count ), arg_name ); - FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s\n", csp ); + FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to %s: %s\n", csp, avs_as_error( res2 ) ); res = update_clip( h, &vi, res2, res ); } /* if swscale is not available, change the range if necessary. This only applies to YUV-based CSPs however */ @@ -441,13 +460,15 @@ info->csp = X264_CSP_I420 | X264_CSP_HIGH_DEPTH; else if( avs_is_yv12( vi ) ) info->csp = X264_CSP_I420; -#if HAVE_SWSCALE + else if( AVS_IS_Y16( vi ) ) + info->csp = X264_CSP_I400 | X264_CSP_HIGH_DEPTH; + else if( avs_is_y8( vi ) ) + info->csp = X264_CSP_I400; else if( avs_is_yuy2( vi ) ) - info->csp = AV_PIX_FMT_YUYV422 | X264_CSP_OTHER; + info->csp = X264_CSP_YUYV; +#if HAVE_SWSCALE else if( avs_is_yv411( vi ) ) info->csp = AV_PIX_FMT_YUV411P | X264_CSP_OTHER; - else if( avs_is_y8( vi ) ) - info->csp = AV_PIX_FMT_GRAY8 | X264_CSP_OTHER; #endif else { diff -Nru x264-0.152.2854+gite9a5903/input/ffms.c x264-0.158.2988+git-20191101.7817004/input/ffms.c --- x264-0.152.2854+gite9a5903/input/ffms.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/ffms.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ffms.c: ffmpegsource input ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Mike Gurlitz * Steven Walters @@ -27,12 +27,13 @@ #include "input.h" #include -#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ ) #undef DECLARE_ALIGNED #include #include +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ ) + #define PROGRESS_LENGTH 36 typedef struct diff -Nru x264-0.152.2854+gite9a5903/input/input.c x264-0.158.2988+git-20191101.7817004/input/input.c --- x264-0.152.2854+gite9a5903/input/input.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/input.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * input.c: common input functions ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Steven Walters * Henrik Gramner @@ -34,6 +34,7 @@ #endif const x264_cli_csp_t x264_cli_csps[] = { + [X264_CSP_I400] = { "i400", 1, { 1 }, { 1 }, 1, 1 }, [X264_CSP_I420] = { "i420", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 }, [X264_CSP_I422] = { "i422", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 }, [X264_CSP_I444] = { "i444", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 }, @@ -86,7 +87,7 @@ return size; } -static int x264_cli_pic_init_internal( cli_pic_t *pic, int csp, int width, int height, int align, int alloc ) +static int cli_pic_init_internal( cli_pic_t *pic, int csp, int width, int height, int align, int alloc ) { memset( pic, 0, sizeof(cli_pic_t) ); int csp_mask = csp & X264_CSP_MASK; @@ -118,17 +119,17 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height ) { - return x264_cli_pic_init_internal( pic, csp, width, height, 1, 1 ); + return cli_pic_init_internal( pic, csp, width, height, 1, 1 ); } int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height ) { - return x264_cli_pic_init_internal( pic, csp, width, height, NATIVE_ALIGN, 1 ); + return cli_pic_init_internal( pic, csp, width, height, NATIVE_ALIGN, 1 ); } int x264_cli_pic_init_noalloc( cli_pic_t *pic, int csp, int width, int height ) { - return x264_cli_pic_init_internal( pic, csp, width, height, 1, 0 ); + return cli_pic_init_internal( pic, csp, width, height, 1, 0 ); } void x264_cli_pic_clean( cli_pic_t *pic ) @@ -148,35 +149,71 @@ /* Functions for handling memory-mapped input frames */ int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh ) { -#ifdef _WIN32 - HANDLE osfhandle = (HANDLE)_get_osfhandle( _fileno( fh ) ); - if( osfhandle != INVALID_HANDLE_VALUE ) +#if defined(_WIN32) || HAVE_MMAP + int fd = fileno( fh ); + x264_struct_stat file_stat; + if( !x264_fstat( fd, &file_stat ) ) { - SYSTEM_INFO si; - GetSystemInfo( &si ); - h->align_mask = si.dwAllocationGranularity - 1; - h->prefetch_virtual_memory = (void*)GetProcAddress( GetModuleHandleW( L"kernel32.dll" ), "PrefetchVirtualMemory" ); - h->process_handle = GetCurrentProcess(); - h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL ); - return !h->map_handle; - } + h->file_size = file_stat.st_size; +#ifdef _WIN32 + HANDLE osfhandle = (HANDLE)_get_osfhandle( fd ); + if( osfhandle != INVALID_HANDLE_VALUE ) + { + SYSTEM_INFO si; + GetSystemInfo( &si ); + h->page_mask = si.dwPageSize - 1; + h->align_mask = si.dwAllocationGranularity - 1; + h->prefetch_virtual_memory = (void*)GetProcAddress( GetModuleHandleW( L"kernel32.dll" ), "PrefetchVirtualMemory" ); + h->process_handle = GetCurrentProcess(); + h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL ); + return !h->map_handle; + } #elif HAVE_MMAP && defined(_SC_PAGESIZE) - h->align_mask = sysconf( _SC_PAGESIZE ) - 1; - h->fd = fileno( fh ); - return h->align_mask < 0 || h->fd < 0; + h->align_mask = sysconf( _SC_PAGESIZE ) - 1; + h->fd = fd; + return h->align_mask < 0 || fd < 0; +#endif + } #endif return -1; } +/* Third-party filters such as swscale can overread the input buffer which may result + * in segfaults. We have to pad the buffer size as a workaround to avoid that. */ +#define MMAP_PADDING 64 + void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size ) { #if defined(_WIN32) || HAVE_MMAP + uint8_t *base; int align = offset & h->align_mask; offset -= align; size += align; #ifdef _WIN32 - uint8_t *base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size ); - if( base ) + /* If the padding crosses a page boundary we need to increase the mapping size. */ + size_t padded_size = (-size & h->page_mask) < MMAP_PADDING ? size + MMAP_PADDING : size; + if( offset + padded_size > h->file_size ) + { + /* It's not possible to do the POSIX mmap() remapping trick on Windows, so if the padding crosses a + * page boundary past the end of the file we have to copy the entire frame into a padded buffer. */ + if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size )) ) + { + uint8_t *buf = NULL; + HANDLE anon_map = CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, padded_size, NULL ); + if( anon_map ) + { + if( (buf = MapViewOfFile( anon_map, FILE_MAP_WRITE, 0, 0, 0 )) ) + { + buf += align; + memcpy( buf, base + align, size - align ); + } + CloseHandle( anon_map ); + } + UnmapViewOfFile( base ); + return buf; + } + } + else if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, padded_size )) ) { /* PrefetchVirtualMemory() is only available on Windows 8 and newer. */ if( h->prefetch_virtual_memory ) @@ -187,8 +224,8 @@ return base + align; } #else - uint8_t *base = mmap( NULL, size, PROT_READ, MAP_PRIVATE, h->fd, offset ); - if( base != MAP_FAILED ) + size_t padded_size = size + MMAP_PADDING; + if( (base = mmap( NULL, padded_size, PROT_READ, MAP_PRIVATE, h->fd, offset )) != MAP_FAILED ) { /* Ask the OS to readahead pages. This improves performance whereas * forcing page faults by manually accessing every page does not. @@ -199,6 +236,12 @@ #elif defined(POSIX_MADV_WILLNEED) posix_madvise( base, size, POSIX_MADV_WILLNEED ); #endif + /* Remap the file mapping of any padding that crosses a page boundary past the end of + * the file into a copy of the last valid page to prevent reads from invalid memory. */ + size_t aligned_size = (padded_size - 1) & ~h->align_mask; + if( offset + aligned_size >= h->file_size ) + mmap( base + aligned_size, padded_size - aligned_size, PROT_READ, MAP_PRIVATE|MAP_FIXED, h->fd, (offset + size - 1) & ~h->align_mask ); + return base + align; } #endif @@ -213,7 +256,7 @@ #ifdef _WIN32 return !UnmapViewOfFile( base ); #else - return munmap( base, size + (intptr_t)addr - (intptr_t)base ); + return munmap( base, size + MMAP_PADDING + (intptr_t)addr - (intptr_t)base ); #endif #endif return -1; diff -Nru x264-0.152.2854+gite9a5903/input/input.h x264-0.158.2988+git-20191101.7817004/input/input.h --- x264-0.152.2854+gite9a5903/input/input.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/input.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * input.h: file input ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -102,7 +102,8 @@ extern const cli_input_t raw_input; extern const cli_input_t y4m_input; extern const cli_input_t avs_input; -extern const cli_input_t thread_input; +extern const cli_input_t thread_8_input; +extern const cli_input_t thread_10_input; extern const cli_input_t lavf_input; extern const cli_input_t ffms_input; extern const cli_input_t timecode_input; @@ -137,8 +138,10 @@ typedef struct { + int64_t file_size; int align_mask; #ifdef _WIN32 + int page_mask; BOOL (WINAPI *prefetch_virtual_memory)( HANDLE, ULONG_PTR, PVOID, ULONG ); HANDLE process_handle; HANDLE map_handle; diff -Nru x264-0.152.2854+gite9a5903/input/lavf.c x264-0.158.2988+git-20191101.7817004/input/lavf.c --- x264-0.152.2854+gite9a5903/input/lavf.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/lavf.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * lavf.c: libavformat input ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Mike Gurlitz * Steven Walters @@ -25,16 +25,20 @@ *****************************************************************************/ #include "input.h" -#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ ) + #undef DECLARE_ALIGNED #include +#include +#include #include #include -#include + +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ ) typedef struct { AVFormatContext *lavf; + AVCodecContext *lavc; AVFrame *frame; int stream_id; int next_frame; @@ -54,6 +58,25 @@ } } +static AVCodecContext *codec_from_stream( AVStream *stream ) +{ + AVCodec *codec = avcodec_find_decoder( stream->codecpar->codec_id ); + if( !codec ) + return NULL; + + AVCodecContext *c = avcodec_alloc_context3( codec ); + if( !c ) + return NULL; + + if( avcodec_parameters_to_context( c, stream->codecpar ) < 0 ) + { + avcodec_free_context( &c ); + return NULL; + } + + return c; +} + static int read_frame_internal( cli_pic_t *p_pic, lavf_hnd_t *h, int i_frame, video_info_t *info ) { if( h->first_pic && !info ) @@ -72,8 +95,6 @@ return 0; } - AVCodecContext *c = h->lavf->streams[h->stream_id]->codec; - AVPacket pkt; av_init_packet( &pkt ); pkt.data = NULL; @@ -81,31 +102,32 @@ while( i_frame >= h->next_frame ) { - int finished = 0; - int ret = 0; - do - { - ret = av_read_frame( h->lavf, &pkt ); + int ret; - if( ret < 0 ) + while( (ret = avcodec_receive_frame( h->lavc, h->frame )) ) + { + if( ret == AVERROR(EAGAIN) ) { - av_init_packet( &pkt ); - pkt.data = NULL; - pkt.size = 0; + while( !(ret = av_read_frame( h->lavf, &pkt )) && pkt.stream_index != h->stream_id ) + av_packet_unref( &pkt ); + + if( ret ) + ret = avcodec_send_packet( h->lavc, NULL ); + else + { + ret = avcodec_send_packet( h->lavc, &pkt ); + av_packet_unref( &pkt ); + } } + else if( ret == AVERROR_EOF ) + return -1; - if( ret < 0 || pkt.stream_index == h->stream_id ) + if( ret ) { - if( avcodec_decode_video2( c, h->frame, &finished, &pkt ) < 0 ) - x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame ); + x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame ); + return -1; } - - if( ret >= 0 ) - av_free_packet( &pkt ); - } while( !finished && ret >= 0 ); - - if( !finished ) - return -1; + } h->next_frame++; } @@ -113,9 +135,9 @@ memcpy( p_pic->img.stride, h->frame->linesize, sizeof(p_pic->img.stride) ); memcpy( p_pic->img.plane, h->frame->data, sizeof(p_pic->img.plane) ); int is_fullrange = 0; - p_pic->img.width = c->width; - p_pic->img.height = c->height; - p_pic->img.csp = handle_jpeg( c->pix_fmt, &is_fullrange ) | X264_CSP_OTHER; + p_pic->img.width = h->lavc->width; + p_pic->img.height = h->lavc->height; + p_pic->img.csp = handle_jpeg( h->lavc->pix_fmt, &is_fullrange ) | X264_CSP_OTHER; if( info ) { @@ -127,8 +149,8 @@ if( h->vfr_input ) { p_pic->pts = p_pic->duration = 0; - if( h->frame->pkt_pts != AV_NOPTS_VALUE ) - p_pic->pts = h->frame->pkt_pts; + if( h->frame->pts != AV_NOPTS_VALUE ) + p_pic->pts = h->frame->pts; else if( h->frame->pkt_dts != AV_NOPTS_VALUE ) p_pic->pts = h->frame->pkt_dts; // for AVI files else if( info ) @@ -174,12 +196,15 @@ FAIL_IF_ERROR( avformat_find_stream_info( h->lavf, NULL ) < 0, "could not find input stream info\n" ); int i = 0; - while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != AVMEDIA_TYPE_VIDEO ) + while( i < h->lavf->nb_streams && h->lavf->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO ) i++; FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" ); h->stream_id = i; h->next_frame = 0; - AVCodecContext *c = h->lavf->streams[i]->codec; + h->lavc = codec_from_stream( h->lavf->streams[i] ); + if( !h->lavc ) + return -1; + info->fps_num = h->lavf->streams[i]->avg_frame_rate.num; info->fps_den = h->lavf->streams[i]->avg_frame_rate.den; info->timebase_num = h->lavf->streams[i]->time_base.num; @@ -187,7 +212,7 @@ /* lavf is thread unsafe as calling av_read_frame invalidates previously read AVPackets */ info->thread_safe = 0; h->vfr_input = info->vfr; - FAIL_IF_ERROR( avcodec_open2( c, avcodec_find_decoder( c->codec_id ), NULL ), + FAIL_IF_ERROR( avcodec_open2( h->lavc, avcodec_find_decoder( h->lavc->codec_id ), NULL ), "could not find decoder for video stream\n" ); /* prefetch the first frame and set/confirm flags */ @@ -197,17 +222,17 @@ if( read_frame_internal( h->first_pic, h, 0, info ) ) return -1; - info->width = c->width; - info->height = c->height; + info->width = h->lavc->width; + info->height = h->lavc->height; info->csp = h->first_pic->img.csp; info->num_frames = h->lavf->streams[i]->nb_frames; - info->sar_height = c->sample_aspect_ratio.den; - info->sar_width = c->sample_aspect_ratio.num; - info->fullrange |= c->color_range == AVCOL_RANGE_JPEG; + info->sar_height = h->lavc->sample_aspect_ratio.den; + info->sar_width = h->lavc->sample_aspect_ratio.num; + info->fullrange |= h->lavc->color_range == AVCOL_RANGE_JPEG; /* avisynth stores rgb data vertically flipped. */ if( !strcasecmp( get_filename_extension( psz_filename ), "avs" ) && - (c->pix_fmt == AV_PIX_FMT_BGRA || c->pix_fmt == AV_PIX_FMT_BGR24) ) + (h->lavc->pix_fmt == AV_PIX_FMT_BGRA || h->lavc->pix_fmt == AV_PIX_FMT_BGR24) ) info->csp |= X264_CSP_VFLIP; *p_handle = h; @@ -237,7 +262,7 @@ static int close_file( hnd_t handle ) { lavf_hnd_t *h = handle; - avcodec_close( h->lavf->streams[h->stream_id]->codec ); + avcodec_free_context( &h->lavc ); avformat_close_input( &h->lavf ); av_frame_free( &h->frame ); free( h ); diff -Nru x264-0.152.2854+gite9a5903/input/raw.c x264-0.158.2988+git-20191101.7817004/input/raw.c --- x264-0.152.2854+gite9a5903/input/raw.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/raw.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw input ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -26,6 +26,7 @@ *****************************************************************************/ #include "input.h" + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "raw", __VA_ARGS__ ) typedef struct diff -Nru x264-0.152.2854+gite9a5903/input/thread.c x264-0.158.2988+git-20191101.7817004/input/thread.c --- x264-0.152.2854+gite9a5903/input/thread.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/thread.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * thread.c: threaded input ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -25,6 +25,9 @@ *****************************************************************************/ #include "input.h" +#include "common/common.h" + +#define thread_input x264_glue3(thread, BIT_DEPTH, input) typedef struct { diff -Nru x264-0.152.2854+gite9a5903/input/timecode.c x264-0.158.2988+git-20191101.7817004/input/timecode.c --- x264-0.152.2854+gite9a5903/input/timecode.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/timecode.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * timecode.c: timecode file input ***************************************************************************** - * Copyright (C) 2010-2017 x264 project + * Copyright (C) 2010-2019 x264 project * * Authors: Yusuke Nakamura * @@ -24,6 +24,7 @@ *****************************************************************************/ #include "input.h" + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ ) typedef struct diff -Nru x264-0.152.2854+gite9a5903/input/y4m.c x264-0.158.2988+git-20191101.7817004/input/y4m.c --- x264-0.152.2854+gite9a5903/input/y4m.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/input/y4m.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * y4m.c: y4m input ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -25,6 +25,7 @@ *****************************************************************************/ #include "input.h" + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ ) typedef struct @@ -47,10 +48,12 @@ static int parse_csp_and_depth( char *csp_name, int *bit_depth ) { - int csp = X264_CSP_MAX; + int csp = X264_CSP_MAX; /* Set colorspace from known variants */ - if( !strncmp( "420", csp_name, 3 ) ) + if( !strncmp( "mono", csp_name, 4 ) ) + csp = X264_CSP_I400; + else if( !strncmp( "420", csp_name, 3 ) ) csp = X264_CSP_I420; else if( !strncmp( "422", csp_name, 3 ) ) csp = X264_CSP_I422; @@ -58,7 +61,8 @@ csp = X264_CSP_I444; /* Set high bit depth from known extensions */ - if( sscanf( csp_name, "%*d%*[pP]%d", bit_depth ) != 1 ) + if( sscanf( csp_name, "mono%d", bit_depth ) != 1 && + sscanf( csp_name, "%*d%*[pP]%d", bit_depth ) != 1 ) *bit_depth = 8; return csp; diff -Nru x264-0.152.2854+gite9a5903/Makefile x264-0.158.2988+git-20191101.7817004/Makefile --- x264-0.152.2854+gite9a5903/Makefile 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/Makefile 2019-11-09 05:16:29.000000000 +0000 @@ -8,51 +8,62 @@ vpath %.asm $(SRCPATH) vpath %.rc $(SRCPATH) +CFLAGS += $(CFLAGSPROF) +LDFLAGS += $(LDFLAGSPROF) + GENERATED = all: default default: -SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ - common/frame.c common/dct.c common/cpu.c common/cabac.c \ - common/common.c common/osdep.c common/rectangle.c \ - common/set.c common/quant.c common/deblock.c common/vlc.c \ - common/mvpred.c common/bitstream.c \ - encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ - encoder/set.c encoder/macroblock.c encoder/cabac.c \ - encoder/cavlc.c encoder/encoder.c encoder/lookahead.c +SRCS = common/osdep.c common/base.c common/cpu.c common/tables.c \ + encoder/api.c + +SRCS_X = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ + common/frame.c common/dct.c common/cabac.c \ + common/common.c common/rectangle.c \ + common/set.c common/quant.c common/deblock.c common/vlc.c \ + common/mvpred.c common/bitstream.c \ + encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ + encoder/set.c encoder/macroblock.c encoder/cabac.c \ + encoder/cavlc.c encoder/encoder.c encoder/lookahead.c -SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \ - output/raw.c output/matroska.c output/matroska_ebml.c \ +SRCS_8 = + +SRCCLI = x264.c autocomplete.c input/input.c input/timecode.c input/raw.c \ + input/y4m.c output/raw.c output/matroska.c output/matroska_ebml.c \ output/flv.c output/flv_bytestream.c filters/filters.c \ filters/video/video.c filters/video/source.c filters/video/internal.c \ - filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \ - filters/video/select_every.c filters/video/crop.c filters/video/depth.c + filters/video/resize.c filters/video/fix_vfr_pts.c \ + filters/video/select_every.c filters/video/crop.c + +SRCCLI_X = filters/video/cache.c filters/video/depth.c SRCSO = + +SRCCHK_X = tools/checkasm.c + +SRCEXAMPLE = example.c + OBJS = +OBJASM = OBJSO = OBJCLI = - -OBJCHK = tools/checkasm.o - -OBJEXAMPLE = example.o +OBJCHK = +OBJCHK_8 = +OBJCHK_10 = +OBJEXAMPLE = CONFIG := $(shell cat config.h) -# GPL-only files -ifneq ($(findstring HAVE_GPL 1, $(CONFIG)),) -SRCCLI += -endif - # Optional module sources ifneq ($(findstring HAVE_AVS 1, $(CONFIG)),) SRCCLI += input/avs.c endif ifneq ($(findstring HAVE_THREAD 1, $(CONFIG)),) -SRCCLI += input/thread.c -SRCS += common/threadpool.c +SRCS_X += common/threadpool.c +SRCCLI_X += input/thread.c endif ifneq ($(findstring HAVE_WIN32THREAD 1, $(CONFIG)),) @@ -75,83 +86,116 @@ SRCCLI += output/mp4_lsmash.c endif -# MMX/SSE optims ifneq ($(AS),) -X86SRC0 = const-a.asm cabac-a.asm dct-a.asm deblock-a.asm mc-a.asm \ - mc-a2.asm pixel-a.asm predict-a.asm quant-a.asm \ - cpu-a.asm dct-32.asm bitstream-a.asm -ifneq ($(findstring HIGH_BIT_DEPTH, $(CONFIG)),) -X86SRC0 += sad16-a.asm -else -X86SRC0 += sad-a.asm -endif -X86SRC = $(X86SRC0:%=common/x86/%) +# MMX/SSE optims +SRCASM_X = ifeq ($(SYS_ARCH),X86) ARCH_X86 = yes -ASMSRC = $(X86SRC) common/x86/pixel-32.asm +SRCASM_X += common/x86/dct-32.asm \ + common/x86/pixel-32.asm endif - ifeq ($(SYS_ARCH),X86_64) ARCH_X86 = yes -ASMSRC = $(X86SRC:-32.asm=-64.asm) common/x86/trellis-64.asm +SRCASM_X += common/x86/dct-64.asm \ + common/x86/trellis-64.asm endif ifdef ARCH_X86 -SRCS += common/x86/mc-c.c common/x86/predict-c.c -OBJASM = $(ASMSRC:%.asm=%.o) -$(OBJASM): common/x86/x86inc.asm common/x86/x86util.asm -OBJCHK += tools/checkasm-a.o +SRCASM_X += common/x86/bitstream-a.asm \ + common/x86/const-a.asm \ + common/x86/cabac-a.asm \ + common/x86/dct-a.asm \ + common/x86/deblock-a.asm \ + common/x86/mc-a.asm \ + common/x86/mc-a2.asm \ + common/x86/pixel-a.asm \ + common/x86/predict-a.asm \ + common/x86/quant-a.asm +SRCS_X += common/x86/mc-c.c \ + common/x86/predict-c.c + +OBJASM += common/x86/cpu-a.o +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.asm=%-8.o) common/x86/sad-a-8.o endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.asm=%-10.o) common/x86/sad16-a-10.o +endif + +OBJCHK += tools/checkasm-a.o endif # AltiVec optims ifeq ($(SYS_ARCH),PPC) -ifneq ($(AS),) -SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \ - common/ppc/quant.c common/ppc/deblock.c \ - common/ppc/predict.c -endif +SRCS_X += common/ppc/dct.c \ + common/ppc/deblock.c \ + common/ppc/mc.c \ + common/ppc/pixel.c \ + common/ppc/predict.c \ + common/ppc/quant.c endif # NEON optims ifeq ($(SYS_ARCH),ARM) -ifneq ($(AS),) -ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \ - common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \ - common/arm/predict-a.S common/arm/bitstream-a.S -SRCS += common/arm/mc-c.c common/arm/predict-c.c -OBJASM = $(ASMSRC:%.S=%.o) -OBJCHK += tools/checkasm-arm.o +SRCASM_X = common/arm/bitstream-a.S \ + common/arm/dct-a.S \ + common/arm/deblock-a.S \ + common/arm/mc-a.S \ + common/arm/pixel-a.S \ + common/arm/predict-a.S \ + common/arm/quant-a.S +SRCS_X += common/arm/mc-c.c \ + common/arm/predict-c.c + +OBJASM += common/arm/cpu-a.o +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-8.o) endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-10.o) +endif + +OBJCHK += tools/checkasm-arm.o endif # AArch64 NEON optims ifeq ($(SYS_ARCH),AARCH64) -ifneq ($(AS),) -ASMSRC += common/aarch64/bitstream-a.S \ - common/aarch64/cabac-a.S \ - common/aarch64/dct-a.S \ - common/aarch64/deblock-a.S \ - common/aarch64/mc-a.S \ - common/aarch64/pixel-a.S \ - common/aarch64/predict-a.S \ - common/aarch64/quant-a.S -SRCS += common/aarch64/asm-offsets.c \ - common/aarch64/mc-c.c \ - common/aarch64/predict-c.c -OBJASM = $(ASMSRC:%.S=%.o) -OBJCHK += tools/checkasm-aarch64.o +SRCASM_X = common/aarch64/bitstream-a.S \ + common/aarch64/cabac-a.S \ + common/aarch64/dct-a.S \ + common/aarch64/deblock-a.S \ + common/aarch64/mc-a.S \ + common/aarch64/pixel-a.S \ + common/aarch64/predict-a.S \ + common/aarch64/quant-a.S +SRCS_X += common/aarch64/asm-offsets.c \ + common/aarch64/mc-c.c \ + common/aarch64/predict-c.c + +OBJASM += +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-8.o) endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) +OBJASM += $(SRCASM_X:%.S=%-10.o) +endif + +OBJCHK += tools/checkasm-aarch64.o endif # MSA optims ifeq ($(SYS_ARCH),MIPS) ifneq ($(findstring HAVE_MSA 1, $(CONFIG)),) -SRCS += common/mips/mc-c.c common/mips/dct-c.c \ - common/mips/deblock-c.c common/mips/pixel-c.c \ - common/mips/predict-c.c common/mips/quant-c.c +SRCS_X += common/mips/dct-c.c \ + common/mips/deblock-c.c \ + common/mips/mc-c.c \ + common/mips/pixel-c.c \ + common/mips/predict-c.c \ + common/mips/quant-c.c +endif endif + endif ifneq ($(HAVE_GETOPT_LONG),1) @@ -170,14 +214,28 @@ common/oclobj.h: common/opencl/x264-cl.h $(wildcard $(SRCPATH)/common/opencl/*.cl) cat $^ | $(SRCPATH)/tools/cltostr.sh $@ GENERATED += common/oclobj.h -SRCS += common/opencl.c encoder/slicetype-cl.c +SRCS_8 += common/opencl.c encoder/slicetype-cl.c endif OBJS += $(SRCS:%.c=%.o) OBJCLI += $(SRCCLI:%.c=%.o) OBJSO += $(SRCSO:%.c=%.o) +OBJEXAMPLE += $(SRCEXAMPLE:%.c=%.o) -.PHONY: all default fprofiled clean distclean install install-* uninstall cli lib-* etags +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) +OBJS += $(SRCS_X:%.c=%-8.o) $(SRCS_8:%.c=%-8.o) +OBJCLI += $(SRCCLI_X:%.c=%-8.o) +OBJCHK_8 += $(SRCCHK_X:%.c=%-8.o) +checkasm: checkasm8$(EXE) +endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) +OBJS += $(SRCS_X:%.c=%-10.o) +OBJCLI += $(SRCCLI_X:%.c=%-10.o) +OBJCHK_10 += $(SRCCHK_X:%.c=%-10.o) +checkasm: checkasm10$(EXE) +endif + +.PHONY: all default fprofiled clean distclean install install-* uninstall cli lib-* checkasm etags cli: x264$(EXE) lib-static: $(LIBX264) @@ -191,32 +249,66 @@ $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO) $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS) +$(IMPLIBNAME): $(SONAME) + ifneq ($(EXE),) -.PHONY: x264 checkasm example +.PHONY: x264 checkasm8 checkasm10 example x264: x264$(EXE) -checkasm: checkasm$(EXE) +checkasm8: checkasm8$(EXE) +checkasm10: checkasm10$(EXE) example: example$(EXE) endif x264$(EXE): $(GENERATED) .depend $(OBJCLI) $(CLI_LIBX264) $(LD)$@ $(OBJCLI) $(CLI_LIBX264) $(LDFLAGSCLI) $(LDFLAGS) -checkasm$(EXE): $(GENERATED) .depend $(OBJCHK) $(LIBX264) - $(LD)$@ $(OBJCHK) $(LIBX264) $(LDFLAGS) +checkasm8$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_8) $(LIBX264) + $(LD)$@ $(OBJCHK) $(OBJCHK_8) $(LIBX264) $(LDFLAGS) + +checkasm10$(EXE): $(GENERATED) .depend $(OBJCHK) $(OBJCHK_10) $(LIBX264) + $(LD)$@ $(OBJCHK) $(OBJCHK_10) $(LIBX264) $(LDFLAGS) example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264) $(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS) -$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJEXAMPLE): .depend +$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO) +$(OBJCLI): CFLAGS += $(CFLAGSCLI) + +$(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend + +%.o: %.c + $(CC) $(CFLAGS) -c $< $(CC_O) + +%-8.o: %.c + $(CC) $(CFLAGS) -c $< $(CC_O) -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 + +%-10.o: %.c + $(CC) $(CFLAGS) -c $< $(CC_O) -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 %.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm $(AS) $(ASFLAGS) -o $@ $< -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile +%-8.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm + $(AS) $(ASFLAGS) -o $@ $< -DBIT_DEPTH=8 -Dprivate_prefix=x264_8 + -@ $(if $(STRIP), $(STRIP) -x $@) + +%-10.o: %.asm common/x86/x86inc.asm common/x86/x86util.asm + $(AS) $(ASFLAGS) -o $@ $< -DBIT_DEPTH=10 -Dprivate_prefix=x264_10 + -@ $(if $(STRIP), $(STRIP) -x $@) + %.o: %.S $(AS) $(ASFLAGS) -o $@ $< -@ $(if $(STRIP), $(STRIP) -x $@) # delete local/anonymous symbols, so they don't show up in oprofile +%-8.o: %.S + $(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=0 -DBIT_DEPTH=8 + -@ $(if $(STRIP), $(STRIP) -x $@) + +%-10.o: %.S + $(AS) $(ASFLAGS) -o $@ $< -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10 + -@ $(if $(STRIP), $(STRIP) -x $@) + %.dll.o: %.rc x264.h $(RC) $(RCFLAGS)$@ -DDLL $< @@ -227,9 +319,21 @@ @rm -f .depend @echo 'dependency file generation...' ifeq ($(COMPILER),CL) - @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%.o)" 1>> .depend;) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO) $(SRCEXAMPLE)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%.o)" 1>> .depend;) +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCS_8) $(SRCCLI_X) $(SRCCHK_X)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%-8.o)" 1>> .depend;) +endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCCLI_X) $(SRCCHK_X)), $(SRCPATH)/tools/msvsdepend.sh "$(CC)" "$(CFLAGS)" "$(SRC)" "$(SRC:$(SRCPATH)/%.c=%-10.o)" 1>> .depend;) +endif else - @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS) $(SRCCLI) $(SRCSO) $(SRCEXAMPLE)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%.o) $(DEPMM) 1>> .depend;) +ifneq ($(findstring HAVE_BITDEPTH8 1, $(CONFIG)),) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCS_8) $(SRCCLI_X) $(SRCCHK_X)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%-8.o) $(DEPMM) 1>> .depend;) +endif +ifneq ($(findstring HAVE_BITDEPTH10 1, $(CONFIG)),) + @$(foreach SRC, $(addprefix $(SRCPATH)/, $(SRCS_X) $(SRCCLI_X) $(SRCCHK_X)), $(CC) $(CFLAGS) $(SRC) $(DEPMT) $(SRC:$(SRCPATH)/%.c=%-10.o) $(DEPMM) 1>> .depend;) +endif endif config.mak: @@ -240,7 +344,7 @@ include .depend endif -SRC2 = $(SRCS) $(SRCCLI) +OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI) # These should cover most of the important codepaths OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50 @@ -257,25 +361,25 @@ @echo 'where infiles are anything that x264 understands,' @echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.' else -fprofiled: - $(MAKE) clean - $(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)" +fprofiled: clean + $(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)" $(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;)) ifeq ($(COMPILER),CL) # Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted rm -f x264$(EXE) else - rm -f $(SRC2:%.c=%.o) + rm -f $(OBJPROF) endif - $(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)" - rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc + $(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)" + rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc endif clean: - rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(SONAME) *.a *.lib *.exp *.pdb x264 x264.exe .depend TAGS - rm -f checkasm checkasm.exe $(OBJCHK) $(GENERATED) x264_lookahead.clbin - rm -f example example.exe $(OBJEXAMPLE) - rm -f $(SRC2:%.c=%.gcda) $(SRC2:%.c=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc + rm -f $(OBJS) $(OBJASM) $(OBJCLI) $(OBJSO) $(GENERATED) .depend TAGS + rm -f $(SONAME) *.a *.lib *.exp *.pdb x264$(EXE) x264_lookahead.clbin + rm -f checkasm8$(EXE) checkasm10$(EXE) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) + rm -f example$(EXE) $(OBJEXAMPLE) + rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc distclean: clean rm -f config.mak x264_config.h config.h config.log x264.pc x264.def @@ -286,18 +390,17 @@ $(INSTALL) x264$(EXE) $(DESTDIR)$(bindir) install-lib-dev: - $(INSTALL) -d $(DESTDIR)$(includedir) - $(INSTALL) -d $(DESTDIR)$(libdir) - $(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig - $(INSTALL) -m 644 $(SRCPATH)/x264.h $(DESTDIR)$(includedir) - $(INSTALL) -m 644 x264_config.h $(DESTDIR)$(includedir) + $(INSTALL) -d $(DESTDIR)$(includedir) $(DESTDIR)$(libdir)/pkgconfig + $(INSTALL) -m 644 $(SRCPATH)/x264.h x264_config.h $(DESTDIR)$(includedir) $(INSTALL) -m 644 x264.pc $(DESTDIR)$(libdir)/pkgconfig install-lib-static: lib-static install-lib-dev + $(INSTALL) -d $(DESTDIR)$(libdir) $(INSTALL) -m 644 $(LIBX264) $(DESTDIR)$(libdir) $(if $(RANLIB), $(RANLIB) $(DESTDIR)$(libdir)/$(LIBX264)) install-lib-shared: lib-shared install-lib-dev + $(INSTALL) -d $(DESTDIR)$(libdir) ifneq ($(IMPLIBNAME),) $(INSTALL) -d $(DESTDIR)$(bindir) $(INSTALL) -m 755 $(SONAME) $(DESTDIR)$(bindir) @@ -316,7 +419,5 @@ rm -f $(DESTDIR)$(libdir)/$(SONAME) $(DESTDIR)$(libdir)/libx264.$(SOSUFFIX) endif -etags: TAGS - -TAGS: - etags $(SRCS) +etags TAGS: + etags $(SRCS) $(SRCS_X) $(SRCS_8) diff -Nru x264-0.152.2854+gite9a5903/output/flv_bytestream.c x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.c --- x264-0.152.2854+gite9a5903/output/flv_bytestream.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.c: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Kieran Kunhya * diff -Nru x264-0.152.2854+gite9a5903/output/flv_bytestream.h x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.h --- x264-0.152.2854+gite9a5903/output/flv_bytestream.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/flv_bytestream.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.h: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Kieran Kunhya * @@ -90,8 +90,8 @@ enum { - FLV_FRAME_KEY = 1 << FLV_VIDEO_FRAMETYPE_OFFSET | 7, - FLV_FRAME_INTER = 2 << FLV_VIDEO_FRAMETYPE_OFFSET | 7, + FLV_FRAME_KEY = 1 << FLV_VIDEO_FRAMETYPE_OFFSET, + FLV_FRAME_INTER = 2 << FLV_VIDEO_FRAMETYPE_OFFSET, }; typedef enum diff -Nru x264-0.152.2854+gite9a5903/output/flv.c x264-0.158.2988+git-20191101.7817004/output/flv.c --- x264-0.152.2854+gite9a5903/output/flv.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/flv.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv.c: flv muxer ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Kieran Kunhya * @@ -196,7 +196,7 @@ flv_put_be24( c, 0 ); // StreamID - Always 0 p_flv->start = c->d_cur; // needed for overwriting length - flv_put_byte( c, 7 | FLV_FRAME_KEY ); // Frametype and CodecID + flv_put_byte( c, FLV_FRAME_KEY | FLV_CODECID_H264 ); // FrameType and CodecID flv_put_byte( c, 0 ); // AVC sequence header flv_put_be24( c, 0 ); // composition time @@ -279,7 +279,7 @@ flv_put_be24( c, 0 ); p_flv->start = c->d_cur; - flv_put_byte( c, p_picture->b_keyframe ? FLV_FRAME_KEY : FLV_FRAME_INTER ); + flv_put_byte( c, (p_picture->b_keyframe ? FLV_FRAME_KEY : FLV_FRAME_INTER) | FLV_CODECID_H264 ); flv_put_byte( c, 1 ); // AVC NALU flv_put_be24( c, offset ); @@ -322,7 +322,12 @@ CHECK( flv_flush_data( c ) ); - double total_duration = (2 * largest_pts - second_largest_pts) * p_flv->d_timebase; + double total_duration; + /* duration algorithm fails with one frame */ + if( p_flv->i_framenum == 1 ) + total_duration = p_flv->i_fps_num ? (double)p_flv->i_fps_den / p_flv->i_fps_num : 0; + else + total_duration = (2 * largest_pts - second_largest_pts) * p_flv->d_timebase; if( x264_is_regular_file( c->fp ) && total_duration > 0 ) { diff -Nru x264-0.152.2854+gite9a5903/output/matroska.c x264-0.158.2988+git-20191101.7817004/output/matroska.c --- x264-0.152.2854+gite9a5903/output/matroska.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/matroska.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska.c: matroska muxer ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.152.2854+gite9a5903/output/matroska_ebml.c x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.c --- x264-0.152.2854+gite9a5903/output/matroska_ebml.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.c: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.152.2854+gite9a5903/output/matroska_ebml.h x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.h --- x264-0.152.2854+gite9a5903/output/matroska_ebml.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/matroska_ebml.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.h: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2017 x264 project + * Copyright (C) 2005-2019 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.152.2854+gite9a5903/output/mp4.c x264-0.158.2988+git-20191101.7817004/output/mp4.c --- x264-0.152.2854+gite9a5903/output/mp4.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/mp4.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mp4.c: mp4 muxer ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.152.2854+gite9a5903/output/mp4_lsmash.c x264-0.158.2988+git-20191101.7817004/output/mp4_lsmash.c --- x264-0.152.2854+gite9a5903/output/mp4_lsmash.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/mp4_lsmash.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mp4_lsmash.c: mp4 muxer using L-SMASH ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.152.2854+gite9a5903/output/output.h x264-0.158.2988+git-20191101.7817004/output/output.h --- x264-0.152.2854+gite9a5903/output/output.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/output.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * output.h: x264 file output modules ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.152.2854+gite9a5903/output/raw.c x264-0.158.2988+git-20191101.7817004/output/raw.c --- x264-0.152.2854+gite9a5903/output/raw.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/output/raw.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw muxer ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.152.2854+gite9a5903/tools/bash-autocomplete.sh x264-0.158.2988+git-20191101.7817004/tools/bash-autocomplete.sh --- x264-0.152.2854+gite9a5903/tools/bash-autocomplete.sh 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/bash-autocomplete.sh 2019-11-09 05:16:29.000000000 +0000 @@ -0,0 +1,15 @@ +_x264() +{ + local path args cur prev + + path="${COMP_LINE%%[[:blank:]]*}" + args="${COMP_LINE:${#path}:$((COMP_POINT-${#path}))}" + cur="${args##*[[:blank:]=]}" + prev="$(sed 's/[[:blank:]=]*$//; s/^.*[[:blank:]]//' <<< "${args%%"$cur"}")" + + # Expand ~ + printf -v path '%q' "$path" && eval path="${path/#'\~'/'~'}" + + COMPREPLY=($("$path" --autocomplete "$prev" "$cur")) && compopt +o default +} 2>/dev/null +complete -o default -F _x264 x264 diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-aarch64.S x264-0.158.2988+git-20191101.7817004/tools/checkasm-aarch64.S --- x264-0.152.2854+gite9a5903/tools/checkasm-aarch64.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-aarch64.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * checkasm-aarch64.S: assembly check tool ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Martin Storsjo * @@ -58,7 +58,7 @@ #define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15) -function x264_checkasm_stack_clobber, export=1 +function checkasm_stack_clobber, export=1 mov x3, sp mov x2, #CLOBBER_STACK 1: @@ -71,7 +71,7 @@ #define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15) -function x264_checkasm_call, export=1 +function checkasm_call, export=1 stp x29, x30, [sp, #-16]! mov x29, sp stp x19, x20, [sp, #-16]! @@ -149,7 +149,7 @@ mov w9, #0 str w9, [x2] movrel x0, error_message - bl X(puts) + bl EXT(puts) 0: ldp x0, x1, [sp], #16 ldp d14, d15, [sp], #16 diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-a.asm x264-0.158.2988+git-20191101.7817004/tools/checkasm-a.asm --- x264-0.152.2854+gite9a5903/tools/checkasm-a.asm 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-a.asm 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* checkasm-a.asm: assembly check tool ;***************************************************************************** -;* Copyright (C) 2008-2017 x264 project +;* Copyright (C) 2008-2019 x264 project ;* ;* Authors: Loren Merritt ;* Henrik Gramner diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm-arm.S x264-0.158.2988+git-20191101.7817004/tools/checkasm-arm.S --- x264-0.152.2854+gite9a5903/tools/checkasm-arm.S 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/checkasm-arm.S 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * checkasm-arm.S: assembly check tool ***************************************************************************** - * Copyright (C) 2015-2017 x264 project + * Copyright (C) 2015-2019 x264 project * * Authors: Martin Storsjo * @@ -52,7 +52,7 @@ .macro clobbercheck variant .equ pushed, 4*10 -function x264_checkasm_call_\variant +function checkasm_call_\variant push {r4-r11, lr} .ifc \variant, neon vpush {q4-q7} @@ -128,7 +128,7 @@ mov r12, #0 str r12, [r2] movrel r0, error_message - blx X(puts) + blx EXT(puts) 0: pop {r0, r1} .ifc \variant, neon diff -Nru x264-0.152.2854+gite9a5903/tools/checkasm.c x264-0.158.2988+git-20191101.7817004/tools/checkasm.c --- x264-0.152.2854+gite9a5903/tools/checkasm.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/checkasm.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * checkasm.c: assembly check tool ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -27,7 +27,6 @@ #include #include "common/common.h" -#include "common/cpu.h" #include "encoder/macroblock.h" #ifdef _WIN32 @@ -41,15 +40,15 @@ #endif /* buf1, buf2: initialised to random data and shouldn't write into them */ -uint8_t *buf1, *buf2; +static uint8_t *buf1, *buf2; /* buf3, buf4: used to store output */ -uint8_t *buf3, *buf4; +static uint8_t *buf3, *buf4; /* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */ -pixel *pbuf1, *pbuf2; +static pixel *pbuf1, *pbuf2; /* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */ -pixel *pbuf3, *pbuf4; +static pixel *pbuf3, *pbuf4; -int quiet = 0; +static int quiet = 0; #define report( name ) { \ if( used_asm && !quiet ) \ @@ -75,10 +74,10 @@ bench_t vers[MAX_CPUS]; } bench_func_t; -int do_bench = 0; -int bench_pattern_len = 0; -const char *bench_pattern = ""; -char func_name[100]; +static int do_bench = 0; +static int bench_pattern_len = 0; +static const char *bench_pattern = ""; +static char func_name[100]; static bench_func_t benchs[MAX_FUNCS]; static const char *pixel_names[12] = { "16x16", "16x8", "8x16", "8x8", "8x4", "4x8", "4x4", "4x16", "4x2", "2x8", "2x4", "2x2" }; @@ -176,7 +175,7 @@ if( k < j ) continue; printf( "%s_%s%s: %"PRId64"\n", benchs[i].name, -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_AVX512 ? "avx512" : b->cpu&X264_CPU_AVX2 ? "avx2" : b->cpu&X264_CPU_BMI2 ? "bmi2" : @@ -207,7 +206,7 @@ b->cpu&X264_CPU_MSA ? "msa" : #endif "c", -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_CACHELINE_32 ? "_c32" : b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" : b->cpu&X264_CPU_CACHELINE_64 ? "_c64" : @@ -230,7 +229,7 @@ static void (*simd_warmup_func)( void ) = NULL; #define simd_warmup() do { if( simd_warmup_func ) simd_warmup_func(); } while( 0 ) -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX int x264_stack_pagealign( int (*func)(), int align ); void x264_checkasm_warmup_avx( void ); void x264_checkasm_warmup_avx512( void ); @@ -242,11 +241,11 @@ #define x264_stack_pagealign( func, align ) func() #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); #endif -#if ARCH_ARM +#if HAVE_ARMV6 intptr_t x264_checkasm_call_neon( intptr_t (*func)(), int *ok, ... ); intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... ); intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon; @@ -254,7 +253,7 @@ #define call_c1(func,...) func(__VA_ARGS__) -#if ARCH_X86_64 +#if HAVE_MMX && ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. * This is done by clobbering the stack with junk around the stack pointer and calling the * assembly function through x264_checkasm_call with added dummy arguments which forces all @@ -270,19 +269,19 @@ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \ simd_warmup(); \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_AARCH64 && !defined(__APPLE__) +#elif HAVE_AARCH64 && !defined(__APPLE__) void x264_checkasm_stack_clobber( uint64_t clobber, ... ); #define call_a1(func,...) ({ \ uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+8 */ \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_X86 || ARCH_ARM +#elif HAVE_MMX || HAVE_ARMV6 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1 call_c1 #endif -#if ARCH_ARM +#if HAVE_ARMV6 #define call_a1_64(func,...) ((uint64_t (*)(intptr_t(*)(), int*, ...))x264_checkasm_call)( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1_64 call_a1 @@ -597,13 +596,13 @@ #define TEST_INTRA_X3( name, i8x8, ... ) \ if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \ { \ - ALIGNED_16( int res_c[3] ); \ - ALIGNED_16( int res_asm[3] ); \ + ALIGNED_16( int res_c[4] ); \ + ALIGNED_16( int res_asm[4] ); \ set_func_name( #name ); \ used_asm = 1; \ call_c( pixel_c.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_c ); \ call_a( pixel_asm.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_asm ); \ - if( memcmp(res_c, res_asm, sizeof(res_c)) ) \ + if( memcmp(res_c, res_asm, 3 * sizeof(*res_c)) ) \ { \ ok = 0; \ fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \ @@ -862,7 +861,7 @@ h->param.analyse.i_luma_deadzone[1] = 0; h->param.analyse.b_transform_8x8 = 1; for( int i = 0; i < 6; i++ ) - h->pps->scaling_list[i] = x264_cqm_flat16; + h->sps->scaling_list[i] = x264_cqm_flat16; x264_cqm_init( h ); x264_quant_init( h, 0, &qf ); @@ -1579,13 +1578,14 @@ intptr_t src_stride = plane_specs[i].src_stride; intptr_t dst_stride = ALIGN( w, 16 ); intptr_t offv = dst_stride*h + 16; + pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); for( int pw = 3; pw <= 4; pw++ ) { memset( pbuf3, 0, 0x1000 ); memset( pbuf4, 0, 0x1000 ); - call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); - call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); + call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, src1, src_stride, pw, w, h ); + call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, src1, src_stride, pw, w, h ); for( int y = 0; y < h; y++ ) if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) || memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) || @@ -1800,6 +1800,8 @@ } } + static const uint16_t mbtree_fix8_counts[] = { 5, 384, 392, 400, 415 }; + if( mc_a.mbtree_fix8_pack != mc_ref.mbtree_fix8_pack ) { set_func_name( "mbtree_fix8_pack" ); @@ -1807,9 +1809,9 @@ float *fix8_src = (float*)(buf3 + 0x800); uint16_t *dstc = (uint16_t*)buf3; uint16_t *dsta = (uint16_t*)buf4; - for( int i = 0; i < 5; i++ ) + for( int i = 0; i < ARRAY_ELEMS(mbtree_fix8_counts); i++ ) { - int count = 256 + i; + int count = mbtree_fix8_counts[i]; for( int j = 0; j < count; j++ ) fix8_src[j] = (int16_t)(rand()) / 256.0f; @@ -1834,9 +1836,9 @@ uint16_t *fix8_src = (uint16_t*)(buf3 + 0x800); float *dstc = (float*)buf3; float *dsta = (float*)buf4; - for( int i = 0; i < 5; i++ ) + for( int i = 0; i < ARRAY_ELEMS(mbtree_fix8_counts); i++ ) { - int count = 256 + i; + int count = mbtree_fix8_counts[i]; for( int j = 0; j < count; j++ ) fix8_src[j] = rand(); @@ -2036,14 +2038,14 @@ if( i_cqm == 0 ) { for( int i = 0; i < 6; i++ ) - h->pps->scaling_list[i] = x264_cqm_flat16; - h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_FLAT; + h->sps->scaling_list[i] = x264_cqm_flat16; + h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_FLAT; } else if( i_cqm == 1 ) { for( int i = 0; i < 6; i++ ) - h->pps->scaling_list[i] = x264_cqm_jvt[i]; - h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_JVT; + h->sps->scaling_list[i] = x264_cqm_jvt[i]; + h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_JVT; } else { @@ -2055,8 +2057,8 @@ for( int i = 0; i < 64; i++ ) cqm_buf[i] = 1; for( int i = 0; i < 6; i++ ) - h->pps->scaling_list[i] = cqm_buf; - h->param.i_cqm_preset = h->pps->i_cqm_preset = X264_CQM_CUSTOM; + h->sps->scaling_list[i] = cqm_buf; + h->param.i_cqm_preset = h->sps->i_cqm_preset = X264_CQM_CUSTOM; } h->param.rc.i_qp_min = 0; @@ -2599,7 +2601,7 @@ DECL_CABAC(c) #if HAVE_MMX DECL_CABAC(asm) -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 DECL_CABAC(asm) #else #define run_cabac_decision_asm run_cabac_decision_c @@ -2805,7 +2807,7 @@ int ret = 0; int cpu0 = 0, cpu1 = 0; uint32_t cpu_detect = x264_cpu_detect(); -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX if( cpu_detect & X264_CPU_AVX512 ) simd_warmup_func = x264_checkasm_warmup_avx512; else if( cpu_detect & X264_CPU_AVX ) @@ -2813,7 +2815,7 @@ #endif simd_warmup(); -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 if( cpu_detect & X264_CPU_MMX2 ) { ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMX2, "MMX" ); @@ -2912,7 +2914,7 @@ return ret; } -int main(int argc, char *argv[]) +REALIGN_STACK int main( int argc, char **argv ) { #ifdef _WIN32 /* Disable the Windows Error Reporting dialog */ @@ -2971,4 +2973,3 @@ print_bench(); return 0; } - diff -Nru x264-0.152.2854+gite9a5903/tools/gas-preprocessor.pl x264-0.158.2988+git-20191101.7817004/tools/gas-preprocessor.pl --- x264-0.152.2854+gite9a5903/tools/gas-preprocessor.pl 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/tools/gas-preprocessor.pl 2019-11-09 05:16:29.000000000 +0000 @@ -97,8 +97,12 @@ if ($as_type eq "armasm") { $preprocess_c_cmd[0] = "cpp"; - push(@preprocess_c_cmd, "-U__ELF__"); - push(@preprocess_c_cmd, "-U__MACH__"); + push(@preprocess_c_cmd, "-undef"); + # Normally a preprocessor for windows would predefine _WIN32, + # but we're using any generic system-agnostic preprocessor "cpp" + # with -undef (to avoid getting predefined variables from the host + # system in cross compilation cases), so manually define it here. + push(@preprocess_c_cmd, "-D_WIN32"); @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd; # Remove -ignore XX parameter pairs from preprocess_c_cmd @@ -245,7 +249,7 @@ my %thumb_labels; my %call_targets; -my %mov32_targets; +my %import_symbols; my %neon_alias_reg; my %neon_alias_type; @@ -270,7 +274,7 @@ # the line indicates a comment for all supported archs (aarch64, arm, ppc # and x86). Also strips line number comments but since they are off anyway # it is no loss. - s/^#.*$//; + s/^\s*#.*$//; # remove all comments (to avoid interfering with evaluating directives) s/(? with ic as conditional code - if ($cond =~ /|$arm_cond_codes/) { + if ($cond =~ /^(|$arm_cond_codes)$/) { if (exists $thumb_labels{$label}) { print ASMFILE ".thumb_func $label\n"; } else { @@ -785,24 +797,25 @@ if ($arch eq "aarch64") { # fix missing aarch64 instructions in Xcode 5.1 (beta3) # mov with vector arguments is not supported, use alias orr instead - if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) { - $line = " orr $1, $2, $2\n"; + if ($line =~ /^(\d+:)?\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) { + $line = "$1 orr $2, $3, $3\n"; } # movi 16, 32 bit shifted variant, shift is optional - if ($line =~ /^\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) { - $line = " movi $1, $2, lsl #0\n"; + if ($line =~ /^(\d+:)?\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) { + $line = "$1 movi $2, $3, lsl #0\n"; } # Xcode 5 misses the alias uxtl. Replace it with the more general ushll. # Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll. - if ($line =~ /^\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) { - $line = " $1shll$2 $3, $4, #0\n"; - } - # clang 3.4 does not automatically use shifted immediates in add/sub - if ($as_type eq "clang" and - $line =~ /^(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) { - my $imm = eval $3; + # armasm64 also misses these instructions. + if ($line =~ /^(\d+:)?\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) { + $line = "$1 $2shll$3 $4, $5, #0\n"; + } + # clang 3.4 and armasm64 do not automatically use shifted immediates in add/sub + if (($as_type eq "clang" or $as_type eq "armasm") and + $line =~ /^(\d+:)?(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) { + my $imm = eval $4; if ($imm > 4095 and not ($imm & 4095)) { - $line = "$1 $2#" . ($imm >> 12) . ", lsl #12\n"; + $line = "$1 $2 $3#" . ($imm >> 12) . ", lsl #12\n"; } } if ($ENV{GASPP_FIX_XCODE5}) { @@ -853,7 +866,7 @@ $last_temp_labels{$num} = $name; } - if ($line =~ s/^(\w+):/$1/) { + if ($line =~ s/^\s*(\w+):/$1/) { # Skip labels that have already been declared with a PROC, # labels must not be declared multiple times. return if (defined $labels_seen{$1}); @@ -866,21 +879,40 @@ # Check branch instructions - if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?(..)?(\.w)?)\s+(\w+)/) { + if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?(..)?(\.w)?)\s+(\w+)/) { my $instr = $2; my $cond = $3; my $width = $4; my $target = $5; # Don't interpret e.g. bic as b with ic as conditional code - if ($cond !~ /|$arm_cond_codes/) { + if ($cond !~ /^(|$arm_cond_codes)$/) { # Not actually a branch } elsif ($target =~ /^(\d+)([bf])$/) { # The target is a local label $line = handle_local_label($line, $1, $2); - $line =~ s/\b$instr\b/$&.w/ if $width eq ""; - } elsif (!is_arm_register($target)) { + $line =~ s/\b$instr\b/$&.w/ if $width eq "" and $arch eq "arm"; + } elsif (($arch eq "arm" and !is_arm_register($target)) or + ($arch eq "aarch64" and !is_aarch64_register($target))) { + $call_targets{$target}++; + } + } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) { + my $instr = $2; + my $reg = $3; + my $bit = $4; + my $target = $5; + if ($target =~ /^(\d+)([bf])$/) { + # The target is a local label + $line = handle_local_label($line, $1, $2); + } else { $call_targets{$target}++; } + # Convert tbz with a wX register into an xX register, + # due to armasm64 bugs/limitations. + if ($instr eq "tbz" and $reg =~ /w\d+/) { + my $xreg = $reg; + $xreg =~ s/w/x/; + $line =~ s/\b$reg\b/$xreg/; + } } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) { while ($line =~ /\b(\d+)([bf])\b/g) { $line = handle_local_label($line, $1, $2); @@ -918,19 +950,106 @@ $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/; } - # Change a movw... #:lower16: into a mov32 pseudoinstruction - $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/; - # and remove the following, matching movt completely - $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//; - - if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) { - $mov32_targets{$1}++; - } + if ($arch eq "arm") { + # Change a movw... #:lower16: into a mov32 pseudoinstruction + $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/; + # and remove the following, matching movt completely + $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//; + + if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) { + $import_symbols{$1}++; + } + + # Misc bugs/deficiencies: + # armasm seems unable to parse e.g. "vmov s0, s1" without a type + # qualifier, thus add .f32. + $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/; + } elsif ($arch eq "aarch64") { + # Convert ext into ext8; armasm64 seems to require it named as ext8. + $line =~ s/^(\s+)ext(\s+)/$1ext8$2/; + + # Pick up targets from ldr x0, =sym+offset + if ($line =~ /^\s*ldr\s+(\w+)\s*,\s*=([a-zA-Z]\w*)(.*)$/) { + my $reg = $1; + my $sym = $2; + my $offset = eval_expr($3); + if ($offset < 0) { + # armasm64 is buggy with ldr x0, =sym+offset where the + # offset is a negative value; it does write a negative + # offset into the literal pool as it should, but the + # negative offset only covers the lower 32 bit of the 64 + # bit literal/relocation. + # Thus remove the offset and apply it manually with a sub + # afterwards. + $offset = -$offset; + $line = "\tldr $reg, =$sym\n\tsub $reg, $reg, #$offset\n"; + } + $import_symbols{$sym}++; + } + + # armasm64 (currently) doesn't support offsets on adrp targets, + # even though the COFF format relocations (and the linker) + # supports it. Therefore strip out the offsets from adrp and + # add :lo12: (in case future armasm64 would start handling it) + # and add an extra explicit add instruction for the offset. + if ($line =~ s/(adrp\s+\w+\s*,\s*(\w+))([\d\+\-\*\/\(\) <>]+)?/\1/) { + $import_symbols{$2}++; + } + if ($line =~ s/(add\s+(\w+)\s*,\s*\w+\s*,\s*):lo12:(\w+)([\d\+\-\*\/\(\) <>]+)?/\1\3/) { + my $reg = $2; + my $sym = $3; + my $offset = eval_expr($4); + $line .= "\tadd $reg, $reg, #$offset\n" if $offset > 0; + $import_symbols{$sym}++; + } + + # Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0", + # or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]". + $line =~ s/(uxtw|sxtw)(\s*\]?\s*)$/\1 #0\2/i; + + # Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]" + $line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i; + + # Convert "ccmp w0, #0, #0, ne" into "ccmpne w0, #0, #0", + # and "csel w0, w0, w0, ne" into "cselne w0, w0, w0". + $line =~ s/(ccmp|csel)\s+([xw]\w+)\s*,\s*([xw#]\w+)\s*,\s*([xw#]\w+)\s*,\s*($arm_cond_codes)/\1\5 \2, \3, \4/; + + # Convert "cinc w0, w0, ne" into "cincne w0, w0". + $line =~ s/(cinc)\s+([xw]\w+)\s*,\s*([xw]\w+)\s*,\s*($arm_cond_codes)/\1\4 \2, \3/; + + # Convert "cset w0, lo" into "csetlo w0" + $line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/; + + # Strip out prfum; armasm64 fails to assemble any + # variant/combination of prfum tested so far, but it can be + # left out without any + $line =~ s/prfum.*\]//; + + # Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]". + # Don't do this for forms with writeback though. + if ($line =~ /(ld|st)(r[bh]?)\s+(\w+)\s*,\s*\[\s*(\w+)\s*,\s*#([^\]]+)\s*\][^!]/) { + my $instr = $1; + my $suffix = $2; + my $target = $3; + my $base = $4; + my $offset = eval_expr($5); + if ($offset < 0) { + $line =~ s/$instr$suffix/${instr}u$suffix/; + } + } - # Misc bugs/deficiencies: - # armasm seems unable to parse e.g. "vmov s0, s1" without a type - # qualifier, thus add .f32. - $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/; + if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) { + # Instructions like fcvtzs and scvtf store the scale value + # inverted in the opcode (stored as 64 - scale), but armasm64 + # in early versions stores it as-is. Thus convert from + # "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56". + if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) { + my $scale = $3; + my $inverted_scale = 64 - $3; + $line =~ s/#$scale/#$inverted_scale/; + } + } + } # armasm is unable to parse &0x - add spacing $line =~ s/&0x/& 0x/g; } @@ -944,7 +1063,7 @@ # Convert "mov pc, lr" into "bx lr", since the former only works # for switching from arm to thumb (and only in armv7), but not # from thumb to arm. - s/mov\s*pc\s*,\s*lr/bx lr/g; + $line =~ s/mov\s*pc\s*,\s*lr/bx lr/g; # Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement. # Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too. @@ -1013,11 +1132,16 @@ $line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/; $line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/; $line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/; - + } + if ($as_type eq "armasm" and $arch eq "arm") { $line =~ s/fmxr/vmsr/; $line =~ s/fmrx/vmrs/; $line =~ s/fadds/vadd.f32/; } + if ($as_type eq "armasm" and $arch eq "aarch64") { + # Convert "b.eq" into "beq" + $line =~ s/\bb\.($arm_cond_codes)\b/b\1/; + } # catch unknown section names that aren't mach-o style (with a comma) if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) { @@ -1038,7 +1162,7 @@ grep exists $thumb_labels{$_}, keys %call_targets; } else { map print(ASMFILE "\tIMPORT $_\n"), - grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets); + grep ! exists $labels_seen{$_}, (keys %call_targets, keys %import_symbols); print ASMFILE "\tEND\n"; } diff -Nru x264-0.152.2854+gite9a5903/version.sh x264-0.158.2988+git-20191101.7817004/version.sh --- x264-0.152.2854+gite9a5903/version.sh 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/version.sh 2019-11-09 05:16:29.000000000 +0000 @@ -1,5 +1,28 @@ #!/bin/sh -# Script modified from upstream source for Debian packaging since packaging -# won't include .git repository. -echo '#define X264_VERSION " r2854 e9a5903"' -echo '#define X264_POINTVER "0.152.2854 e9a5903"' + +cd "$(dirname "$0")" >/dev/null && [ -f x264.h ] || exit 1 + +api="$(grep '#define X264_BUILD' < x264.h | sed 's/^.* \([1-9][0-9]*\).*$/\1/')" +ver="x" +version="" + +if [ -d .git ] && command -v git >/dev/null 2>&1 ; then + localver="$(($(git rev-list HEAD | wc -l)))" + if [ "$localver" -gt 1 ] ; then + ver_diff="$(($(git rev-list origin/master..HEAD | wc -l)))" + ver="$((localver-ver_diff))" + echo "#define X264_REV $ver" + echo "#define X264_REV_DIFF $ver_diff" + if [ "$ver_diff" -ne 0 ] ; then + ver="$ver+$ver_diff" + fi + if git status | grep -q "modified:" ; then + ver="${ver}M" + fi + ver="$ver $(git rev-list -n 1 HEAD | cut -c 1-7)" + version=" r$ver" + fi +fi + +echo "#define X264_VERSION \"$version\"" +echo "#define X264_POINTVER \"0.$api.$ver\"" diff -Nru x264-0.152.2854+gite9a5903/x264.c x264-0.158.2988+git-20191101.7817004/x264.c --- x264-0.152.2854+gite9a5903/x264.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/x264.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264: top-level x264cli functions ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -39,12 +39,14 @@ #include #include -#include "common/common.h" #include "x264cli.h" #include "input/input.h" #include "output/output.h" #include "filters/filters.h" +#define QP_MAX_SPEC (51+6*2) +#define QP_MAX (QP_MAX_SPEC+18) + #define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ ) #if HAVE_LAVF @@ -164,11 +166,52 @@ /* video filter operation struct */ static cli_vid_filter_t filter; -static const char * const demuxer_names[] = +const char * const x264_avcintra_class_names[] = { "50", "100", "200", 0 }; +const char * const x264_cqm_names[] = { "flat", "jvt", 0 }; +const char * const x264_log_level_names[] = { "none", "error", "warning", "info", "debug", 0 }; +const char * const x264_partition_names[] = { "p8x8", "p4x4", "b8x8", "i8x8", "i4x4", "none", "all", 0 }; +const char * const x264_pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 }; +const char * const x264_range_names[] = { "auto", "tv", "pc", 0 }; + +const char * const x264_output_csp_names[] = +{ +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I400 + "i400", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 + "i420", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 + "i422", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444 + "i444", "rgb", +#endif + 0 +}; + +const char * const x264_valid_profile_names[] = +{ +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420 +#if HAVE_BITDEPTH8 +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 + "baseline", "main", +#endif + "high", +#endif +#if HAVE_BITDEPTH10 + "high10", +#endif +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 + "high422", +#endif + "high444", 0 +}; + +const char * const x264_demuxer_names[] = { - "auto", - "raw", - "y4m", + "auto", "raw", "y4m", #if HAVE_AVS "avs", #endif @@ -181,43 +224,24 @@ 0 }; -static const char * const muxer_names[] = +const char * const x264_muxer_names[] = { - "auto", - "raw", - "mkv", - "flv", + "auto", "raw", "mkv", "flv", #if HAVE_GPAC || HAVE_LSMASH "mp4", #endif 0 }; -static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 }; -static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 }; -static const char * const output_csp_names[] = -{ -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 - "i420", -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 - "i422", -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444 - "i444", "rgb", -#endif - 0 -}; static const char * const chroma_format_names[] = { [0] = "all", + [X264_CSP_I400] = "i400", [X264_CSP_I420] = "i420", [X264_CSP_I422] = "i422", [X264_CSP_I444] = "i444" }; -static const char * const range_names[] = { "auto", "tv", "pc", 0 }; - typedef struct { int mod; @@ -331,8 +355,8 @@ #else printf( "using an unknown compiler\n" ); #endif - printf( "x264 configuration: --bit-depth=%d --chroma-format=%s\n", X264_BIT_DEPTH, chroma_format_names[X264_CHROMA_FORMAT] ); - printf( "libx264 configuration: --bit-depth=%d --chroma-format=%s\n", x264_bit_depth, chroma_format_names[x264_chroma_format] ); + printf( "x264 configuration: --chroma-format=%s\n", chroma_format_names[X264_CHROMA_FORMAT] ); + printf( "libx264 configuration: --chroma-format=%s\n", chroma_format_names[x264_chroma_format] ); printf( "x264 license: " ); #if HAVE_GPL printf( "GPL version 2 or later\n" ); @@ -349,8 +373,11 @@ #endif } -int main( int argc, char **argv ) +REALIGN_STACK int main( int argc, char **argv ) { + if( argc == 4 && !strcmp( argv[1], "--autocomplete" ) ) + return x264_cli_autocomplete( argv[2], argv[3] ); + x264_param_t param; cli_opt_t opt = {0}; int ret = 0; @@ -483,7 +510,7 @@ " .mkv -> Matroska\n" " .flv -> Flash Video\n" " .mp4 -> MP4 if compiled with GPAC or L-SMASH support (%s)\n" - "Output bit depth: %d (configured at compile time)\n" + "Output bit depth: %s\n." "\n" "Options:\n" "\n" @@ -514,7 +541,15 @@ #else "no", #endif - x264_bit_depth +#if HAVE_BITDEPTH8 && HAVE_BITDEPTH10 + "8/10" +#elif HAVE_BITDEPTH8 + "8" +#elif HAVE_BITDEPTH10 + "10" +#else + "none" +#endif ); H0( "Example usage:\n" ); H0( "\n" ); @@ -539,8 +574,9 @@ H0( " --profile Force the limits of an H.264 profile\n" " Overrides all settings.\n" ); H2( -#if X264_CHROMA_FORMAT <= X264_CSP_I420 -#if X264_BIT_DEPTH==8 +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420 +#if HAVE_BITDEPTH8 +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 " - baseline:\n" " --no-8x8dct --bframes 0 --no-cabac\n" " --cqm flat --weightp 0\n" @@ -549,14 +585,17 @@ " - main:\n" " --no-8x8dct --cqm flat\n" " No lossless.\n" +#endif " - high:\n" " No lossless.\n" #endif +#if HAVE_BITDEPTH10 " - high10:\n" " No lossless.\n" " Support for bit depth 8-10.\n" #endif -#if X264_CHROMA_FORMAT <= X264_CSP_I422 +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 " - high422:\n" " No lossless.\n" " Support for bit depth 8-10.\n" @@ -565,19 +604,7 @@ " - high444:\n" " Support for bit depth 8-10.\n" " Support for 4:2:0/4:2:2/4:4:4 chroma subsampling.\n" ); - else H0( - " - " -#if X264_CHROMA_FORMAT <= X264_CSP_I420 -#if X264_BIT_DEPTH==8 - "baseline,main,high," -#endif - "high10," -#endif -#if X264_CHROMA_FORMAT <= X264_CSP_I422 - "high422," -#endif - "high444\n" - ); + else H0( " - %s\n", stringify_names( buf, x264_valid_profile_names ) ); H0( " --preset Use a preset to select encoding settings [medium]\n" " Overridden by user settings.\n" ); H2( " - ultrafast:\n" @@ -726,7 +753,7 @@ H2( " --crf-max With CRF+VBV, limit RF to this value\n" " May cause VBV underflows!\n" ); H2( " --qpmin Set min QP [%d]\n", defaults->rc.i_qp_min ); - H2( " --qpmax Set max QP [%d]\n", defaults->rc.i_qp_max ); + H2( " --qpmax Set max QP [%d]\n", X264_MIN( defaults->rc.i_qp_max, QP_MAX ) ); H2( " --qpstep Set max QP step [%d]\n", defaults->rc.i_qp_step ); H2( " --ratetol Tolerance of ABR ratecontrol and VBV [%.1f]\n", defaults->rc.f_rate_tolerance ); H2( " --ipratio QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor ); @@ -764,9 +791,8 @@ H1( "Analysis:\n" ); H1( "\n" ); H1( " -A, --partitions Partitions to consider [\"p8x8,b8x8,i8x8,i4x4\"]\n" - " - p8x8, p4x4, b8x8, i8x8, i4x4\n" - " - none, all\n" - " (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n" ); + " - %s\n" + " (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n", stringify_names( buf, x264_partition_names ) ); H1( " --direct Direct MV prediction mode [\"%s\"]\n" " - none, spatial, temporal, auto\n", strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) ); @@ -818,8 +844,8 @@ H2( " --deadzone-inter Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] ); H2( " --deadzone-intra Set the size of the intra luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[1] ); H2( " Deadzones should be in the range 0 - 32.\n" ); - H2( " --cqm Preset quant matrices [\"flat\"]\n" - " - jvt, flat\n" ); + H2( " --cqm Preset quant matrices [\"%s\"]\n" + " - %s\n", x264_cqm_names[0], stringify_names( buf, x264_cqm_names ) ); H1( " --cqmfile Read custom quant matrices from a JM-compatible file\n" ); H2( " Overrides any other --cqm* options.\n" ); H2( " --cqm4 Set all 4x4 quant matrices\n" @@ -842,7 +868,7 @@ " - component, pal, ntsc, secam, mac, undef\n", strtable_lookup( x264_vidformat_names, defaults->vui.i_vidformat ) ); H2( " --range Specify color range [\"%s\"]\n" - " - %s\n", range_names[0], stringify_names( buf, range_names ) ); + " - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) ); H2( " --colorprim Specify color primaries [\"%s\"]\n" " - undef, bt709, bt470m, bt470bg, smpte170m,\n" " smpte240m, film, bt2020, smpte428,\n" @@ -852,16 +878,21 @@ " - undef, bt709, bt470m, bt470bg, smpte170m,\n" " smpte240m, linear, log100, log316,\n" " iec61966-2-4, bt1361e, iec61966-2-1,\n" - " bt2020-10, bt2020-12, smpte2084, smpte428\n", + " bt2020-10, bt2020-12, smpte2084, smpte428,\n" + " arib-std-b67\n", strtable_lookup( x264_transfer_names, defaults->vui.i_transfer ) ); H2( " --colormatrix Specify color matrix setting [\"%s\"]\n" " - undef, bt709, fcc, bt470bg, smpte170m,\n" " smpte240m, GBR, YCgCo, bt2020nc, bt2020c,\n" - " smpte2085\n", + " smpte2085, chroma-derived-nc,\n" + " chroma-derived-c, ICtCp\n", strtable_lookup( x264_colmatrix_names, defaults->vui.i_colmatrix ) ); H2( " --chromaloc Specify chroma sample location (0 to 5) [%d]\n", defaults->vui.i_chroma_loc ); - + H2( " --alternative-transfer Specify an alternative transfer\n" + " characteristics [\"%s\"]\n" + " - same values as --transfer\n", + strtable_lookup( x264_transfer_names, defaults->i_alternative_transfer ) ); H2( " --nal-hrd Signal HRD information (requires vbv-bufsize)\n" " - none, vbr, cbr (cbr not allowed in .mp4)\n" ); H2( " --filler Force hard-CBR and generate filler (implied by\n" @@ -875,17 +906,24 @@ H0( "\n" ); H0( " -o, --output Specify output file\n" ); H1( " --muxer Specify output container format [\"%s\"]\n" - " - %s\n", muxer_names[0], stringify_names( buf, muxer_names ) ); + " - %s\n", x264_muxer_names[0], stringify_names( buf, x264_muxer_names ) ); H1( " --demuxer Specify input container format [\"%s\"]\n" - " - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) ); + " - %s\n", x264_demuxer_names[0], stringify_names( buf, x264_demuxer_names ) ); H1( " --input-fmt Specify input file format (requires lavf support)\n" ); H1( " --input-csp Specify input colorspace format for raw input\n" ); print_csp_names( longhelp ); H1( " --output-csp Specify output colorspace [\"%s\"]\n" - " - %s\n", output_csp_names[0], stringify_names( buf, output_csp_names ) ); + " - %s\n", +#if X264_CHROMA_FORMAT + x264_output_csp_names[0], +#else + "i420", +#endif + stringify_names( buf, x264_output_csp_names ) ); H1( " --input-depth Specify input bit depth for raw input\n" ); + H1( " --output-depth Specify output bit depth\n" ); H1( " --input-range Specify input color range [\"%s\"]\n" - " - %s\n", range_names[0], stringify_names( buf, range_names ) ); + " - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) ); H1( " --input-res Specify input resolution (width x height)\n" ); H1( " --index Filename for input index file\n" ); H0( " --sar width:height Specify Sample Aspect Ratio\n" ); @@ -895,7 +933,9 @@ H0( " --level Specify level (as defined by Annex A)\n" ); H1( " --bluray-compat Enable compatibility hacks for Blu-ray support\n" ); H1( " --avcintra-class Use compatibility hacks for AVC-Intra class\n" - " - 50, 100, 200\n" ); + " - %s\n", stringify_names( buf, x264_avcintra_class_names ) ); + H1( " --avcintra-flavor AVC-Intra flavor [\"%s\"]\n" + " - %s\n", x264_avcintra_flavor_names[0], stringify_names( buf, x264_avcintra_flavor_names ) ); H1( " --stitchable Don't optimize headers based on video content\n" " Ensures ability to recombine a segmented encode\n" ); H1( "\n" ); @@ -903,8 +943,8 @@ H1( " --no-progress Don't show the progress indicator while encoding\n" ); H0( " --quiet Quiet Mode\n" ); H1( " --log-level Specify the maximum level of logging [\"%s\"]\n" - " - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ), - stringify_names( buf, log_level_names ) ); + " - %s\n", strtable_lookup( x264_log_level_names, cli_log_level - X264_LOG_NONE ), + stringify_names( buf, x264_log_level_names ) ); H1( " --psnr Enable PSNR computation\n" ); H1( " --ssim Enable SSIM computation\n" ); H1( " --threads Force a specific number of threads\n" ); @@ -972,6 +1012,7 @@ OPT_INPUT_RES, OPT_INPUT_CSP, OPT_INPUT_DEPTH, + OPT_OUTPUT_DEPTH, OPT_DTS_COMPRESSION, OPT_OUTPUT_CSP, OPT_INPUT_RANGE, @@ -998,6 +1039,7 @@ { "open-gop", no_argument, NULL, 0 }, { "bluray-compat", no_argument, NULL, 0 }, { "avcintra-class", required_argument, NULL, 0 }, + { "avcintra-flavor", required_argument, NULL, 0 }, { "min-keyint", required_argument, NULL, 'i' }, { "keyint", required_argument, NULL, 'I' }, { "intra-refresh", no_argument, NULL, 0 }, @@ -1133,12 +1175,14 @@ { "pulldown", required_argument, NULL, OPT_PULLDOWN }, { "fake-interlaced", no_argument, NULL, 0 }, { "frame-packing", required_argument, NULL, 0 }, + { "alternative-transfer", required_argument, NULL, 0 }, { "vf", required_argument, NULL, OPT_VIDEO_FILTER }, { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER }, { "input-fmt", required_argument, NULL, OPT_INPUT_FMT }, { "input-res", required_argument, NULL, OPT_INPUT_RES }, { "input-csp", required_argument, NULL, OPT_INPUT_CSP }, { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH }, + { "output-depth", required_argument, NULL, OPT_OUTPUT_DEPTH }, { "dts-compress", no_argument, NULL, OPT_DTS_COMPRESSION }, { "output-csp", required_argument, NULL, OPT_OUTPUT_CSP }, { "input-range", required_argument, NULL, OPT_INPUT_RANGE }, @@ -1298,7 +1342,9 @@ /* force the output csp to what the user specified (or the default) */ param->i_csp = info->csp; int csp = info->csp & X264_CSP_MASK; - if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp >= X264_CSP_I422) ) + if( output_csp == X264_CSP_I400 && csp != X264_CSP_I400 ) + param->i_csp = X264_CSP_I400; + else if( output_csp == X264_CSP_I420 && (csp < X264_CSP_I420 || csp >= X264_CSP_I422) ) param->i_csp = X264_CSP_I420; else if( output_csp == X264_CSP_I422 && (csp < X264_CSP_I422 || csp >= X264_CSP_I444) ) param->i_csp = X264_CSP_I422; @@ -1314,10 +1360,11 @@ if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) ) return -1; - char args[20]; - sprintf( args, "bit_depth=%d", x264_bit_depth ); + char args[20], name[20]; + sprintf( args, "bit_depth=%d", param->i_bitdepth ); + sprintf( name, "depth_%d", param->i_bitdepth ); - if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) ) + if( x264_init_vid_filter( name, handle, &filter, info, param, args ) ) return -1; return 0; @@ -1348,9 +1395,9 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt ) { char *input_filename = NULL; - const char *demuxer = demuxer_names[0]; + const char *demuxer = x264_demuxer_names[0]; char *output_filename = NULL; - const char *muxer = muxer_names[0]; + const char *muxer = x264_muxer_names[0]; char *tcfile_name = NULL; x264_param_t defaults; char *profile = NULL; @@ -1432,10 +1479,10 @@ output_filename = optarg; break; case OPT_MUXER: - FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_name( optarg, x264_muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg ); break; case OPT_DEMUXER: - FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_name( optarg, x264_demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg ); break; case OPT_INDEX: input_opt.index_file = optarg; @@ -1460,7 +1507,7 @@ cli_log_level = param->i_log_level = X264_LOG_DEBUG; break; case OPT_LOG_LEVEL: - if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) ) + if( !parse_enum_value( optarg, x264_log_level_names, &cli_log_level ) ) cli_log_level += X264_LOG_NONE; else cli_log_level = atoi( optarg ); @@ -1499,7 +1546,7 @@ input_opt.timebase = optarg; break; case OPT_PULLDOWN: - FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg ); break; case OPT_VIDEO_FILTER: vid_filters = optarg; @@ -1516,25 +1563,28 @@ case OPT_INPUT_DEPTH: input_opt.bit_depth = atoi( optarg ); break; + case OPT_OUTPUT_DEPTH: + param->i_bitdepth = atoi( optarg ); + break; case OPT_DTS_COMPRESSION: output_opt.use_dts_compress = 1; break; case OPT_OUTPUT_CSP: - FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg ); // correct the parsed value to the libx264 csp value #if X264_CHROMA_FORMAT static const uint8_t output_csp_fix[] = { X264_CHROMA_FORMAT, X264_CSP_RGB }; #else - static const uint8_t output_csp_fix[] = { X264_CSP_I420, X264_CSP_I422, X264_CSP_I444, X264_CSP_RGB }; + static const uint8_t output_csp_fix[] = { X264_CSP_I400, X264_CSP_I420, X264_CSP_I422, X264_CSP_I444, X264_CSP_RGB }; #endif param->i_csp = output_csp = output_csp_fix[output_csp]; break; case OPT_INPUT_RANGE: - FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg ); input_opt.input_range += RANGE_AUTO; break; case OPT_RANGE: - FAIL_IF_ERROR( parse_enum_value( optarg, range_names, ¶m->vui.b_fullrange ), "Unknown range `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, ¶m->vui.b_fullrange ), "Unknown range `%s'\n", optarg ); input_opt.output_range = param->vui.b_fullrange += RANGE_AUTO; break; default: @@ -1627,15 +1677,23 @@ /* init threaded input while the information about the input video is unaltered by filtering */ #if HAVE_THREAD - if( info.thread_safe && (b_thread_input || param->i_threads > 1 + const cli_input_t *thread_input; + if( HAVE_BITDEPTH8 && param->i_bitdepth == 8 ) + thread_input = &thread_8_input; + else if( HAVE_BITDEPTH10 && param->i_bitdepth == 10 ) + thread_input = &thread_10_input; + else + thread_input = NULL; + + if( thread_input && info.thread_safe && (b_thread_input || param->i_threads > 1 || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1)) ) { - if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) ) + if( thread_input->open_file( NULL, &opt->hin, &info, NULL ) ) { fprintf( stderr, "x264 [error]: threaded input failed\n" ); return -1; } - cli_input = thread_input; + cli_input = *thread_input; } #endif diff -Nru x264-0.152.2854+gite9a5903/x264cli.h x264-0.158.2988+git-20191101.7817004/x264cli.h --- x264-0.152.2854+gite9a5903/x264cli.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/x264cli.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264cli.h: x264cli common ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -27,13 +27,24 @@ #ifndef X264_CLI_H #define X264_CLI_H -#include "common/common.h" +#include "common/base.h" /* In microseconds */ #define UPDATE_INTERVAL 250000 typedef void *hnd_t; +extern const char * const x264_avcintra_class_names[]; +extern const char * const x264_cqm_names[]; +extern const char * const x264_log_level_names[]; +extern const char * const x264_partition_names[]; +extern const char * const x264_pulldown_names[]; +extern const char * const x264_range_names[]; +extern const char * const x264_output_csp_names[]; +extern const char * const x264_valid_profile_names[]; +extern const char * const x264_demuxer_names[]; +extern const char * const x264_muxer_names[]; + static inline uint64_t gcd( uint64_t a, uint64_t b ) { while( 1 ) @@ -62,6 +73,7 @@ void x264_cli_log( const char *name, int i_level, const char *fmt, ... ); void x264_cli_printf( int i_level, const char *fmt, ... ); +int x264_cli_autocomplete( const char *prev, const char *cur ); #ifdef _WIN32 void x264_cli_set_console_title( const char *title ); diff -Nru x264-0.152.2854+gite9a5903/x264dll.c x264-0.158.2988+git-20191101.7817004/x264dll.c --- x264-0.152.2854+gite9a5903/x264dll.c 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/x264dll.c 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264dll: x264 DLLMain for win32 ***************************************************************************** - * Copyright (C) 2009-2017 x264 project + * Copyright (C) 2009-2019 x264 project * * Authors: Anton Mitrofanov * @@ -23,7 +23,7 @@ * For more information, contact us at licensing@x264.com. *****************************************************************************/ -#include "common/common.h" +#include "common/base.h" #include /* Callback for our DLL so we can initialize pthread */ diff -Nru x264-0.152.2854+gite9a5903/x264.h x264-0.158.2988+git-20191101.7817004/x264.h --- x264-0.152.2854+gite9a5903/x264.h 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/x264.h 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264.h: x264 public header ***************************************************************************** - * Copyright (C) 2003-2017 x264 project + * Copyright (C) 2003-2019 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -45,7 +45,20 @@ #include "x264_config.h" -#define X264_BUILD 152 +#define X264_BUILD 158 + +#ifdef _WIN32 +# define X264_DLL_IMPORT __declspec(dllimport) +# define X264_DLL_EXPORT __declspec(dllexport) +#else +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define X264_DLL_IMPORT +# define X264_DLL_EXPORT __attribute__((visibility("default"))) +# else +# define X264_DLL_IMPORT +# define X264_DLL_EXPORT +# endif +#endif /* Application developers planning to link against a shared library version of * libx264 from a Microsoft Visual Studio or similar development environment @@ -53,9 +66,13 @@ * This clause does not apply to MinGW, similar development environments, or non * Windows platforms. */ #ifdef X264_API_IMPORTS -#define X264_API __declspec(dllimport) +# define X264_API X264_DLL_IMPORT #else -#define X264_API +# ifdef X264_API_EXPORTS +# define X264_API X264_DLL_EXPORT +# else +# define X264_API +# endif #endif /* x264_t: @@ -170,6 +187,7 @@ #define X264_ANALYSE_PSUB16x16 0x0010 /* Analyse p16x8, p8x16 and p8x8 */ #define X264_ANALYSE_PSUB8x8 0x0020 /* Analyse p8x4, p4x8, p4x4 */ #define X264_ANALYSE_BSUB16x16 0x0100 /* Analyse b16x8, b8x16 and b8x8 */ + #define X264_DIRECT_PRED_NONE 0 #define X264_DIRECT_PRED_SPATIAL 1 #define X264_DIRECT_PRED_TEMPORAL 2 @@ -202,6 +220,10 @@ #define X264_KEYINT_MIN_AUTO 0 #define X264_KEYINT_MAX_INFINITE (1<<30) +/* AVC-Intra flavors */ +#define X264_AVCINTRA_FLAVOR_PANASONIC 0 +#define X264_AVCINTRA_FLAVOR_SONY 1 + static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 }; static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 }; static const char * const x264_b_pyramid_names[] = { "none", "strict", "normal", 0 }; @@ -211,30 +233,32 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "film", "bt2020", "smpte428", "smpte431", "smpte432", 0 }; static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", - "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12", "smpte2084", "smpte428", 0 }; + "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12", "smpte2084", "smpte428", "arib-std-b67", 0 }; static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", "bt2020nc", "bt2020c", - "smpte2085", 0 }; + "smpte2085", "chroma-derived-nc", "chroma-derived-c", "ICtCp", 0 }; static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 }; +static const char * const x264_avcintra_flavor_names[] = { "panasonic", "sony", 0 }; /* Colorspace type */ #define X264_CSP_MASK 0x00ff /* */ #define X264_CSP_NONE 0x0000 /* Invalid mode */ -#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */ -#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */ -#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */ -#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */ -#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */ -#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */ -#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */ -#define X264_CSP_YUYV 0x0008 /* yuyv 4:2:2 packed */ -#define X264_CSP_UYVY 0x0009 /* uyvy 4:2:2 packed */ -#define X264_CSP_V210 0x000a /* 10-bit yuv 4:2:2 packed in 32 */ -#define X264_CSP_I444 0x000b /* yuv 4:4:4 planar */ -#define X264_CSP_YV24 0x000c /* yvu 4:4:4 planar */ -#define X264_CSP_BGR 0x000d /* packed bgr 24bits */ -#define X264_CSP_BGRA 0x000e /* packed bgr 32bits */ -#define X264_CSP_RGB 0x000f /* packed rgb 24bits */ -#define X264_CSP_MAX 0x0010 /* end of list */ +#define X264_CSP_I400 0x0001 /* monochrome 4:0:0 */ +#define X264_CSP_I420 0x0002 /* yuv 4:2:0 planar */ +#define X264_CSP_YV12 0x0003 /* yvu 4:2:0 planar */ +#define X264_CSP_NV12 0x0004 /* yuv 4:2:0, with one y plane and one packed u+v */ +#define X264_CSP_NV21 0x0005 /* yuv 4:2:0, with one y plane and one packed v+u */ +#define X264_CSP_I422 0x0006 /* yuv 4:2:2 planar */ +#define X264_CSP_YV16 0x0007 /* yvu 4:2:2 planar */ +#define X264_CSP_NV16 0x0008 /* yuv 4:2:2, with one y plane and one packed u+v */ +#define X264_CSP_YUYV 0x0009 /* yuyv 4:2:2 packed */ +#define X264_CSP_UYVY 0x000a /* uyvy 4:2:2 packed */ +#define X264_CSP_V210 0x000b /* 10-bit yuv 4:2:2 packed in 32 */ +#define X264_CSP_I444 0x000c /* yuv 4:4:4 planar */ +#define X264_CSP_YV24 0x000d /* yvu 4:4:4 planar */ +#define X264_CSP_BGR 0x000e /* packed bgr 24bits */ +#define X264_CSP_BGRA 0x000f /* packed bgr 32bits */ +#define X264_CSP_RGB 0x0010 /* packed rgb 24bits */ +#define X264_CSP_MAX 0x0011 /* end of list */ #define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */ #define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */ @@ -292,6 +316,7 @@ int i_width; int i_height; int i_csp; /* CSP of encoded bitstream */ + int i_bitdepth; int i_level_idc; int i_frame_total; /* number of frames to encode if known, else 0 */ @@ -336,6 +361,7 @@ int b_open_gop; int b_bluray_compat; int i_avcintra_class; + int i_avcintra_flavor; int b_deblocking_filter; int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */ @@ -407,7 +433,7 @@ { int i_rc_method; /* X264_RC_* */ - int i_qp_constant; /* 0 to (51 + 6*(x264_bit_depth-8)). 0=lossless */ + int i_qp_constant; /* 0=lossless */ int i_qp_min; /* min allowed QP value */ int i_qp_max; /* max allowed QP value */ int i_qp_step; /* max QP step between frames */ @@ -459,6 +485,9 @@ /* frame packing arrangement flag */ int i_frame_packing; + /* alternative transfer SEI */ + int i_alternative_transfer; + /* Muxing parameters */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ @@ -556,7 +585,7 @@ void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque ); } x264_param_t; -void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); +X264_API void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); /**************************************************************************** * H.264 level restriction information @@ -588,7 +617,7 @@ /* x264_param_default: * fill x264_param_t with default values and do CPU detection */ -void x264_param_default( x264_param_t * ); +X264_API void x264_param_default( x264_param_t * ); /* x264_param_parse: * set one parameter by name. @@ -599,7 +628,7 @@ * value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */ #define X264_PARAM_BAD_NAME (-1) #define X264_PARAM_BAD_VALUE (-2) -int x264_param_parse( x264_param_t *, const char *name, const char *value ); +X264_API int x264_param_parse( x264_param_t *, const char *name, const char *value ); /**************************************************************************** * Advanced parameter handling functions @@ -643,13 +672,13 @@ * film, animation, grain, stillimage, psnr, and ssim are psy tunings. * * returns 0 on success, negative on failure (e.g. invalid preset/tune name). */ -int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune ); +X264_API int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune ); /* x264_param_apply_fastfirstpass: * If first-pass mode is set (rc.b_stat_read == 0, rc.b_stat_write == 1), * modify the encoder settings to disable options generally not useful on * the first pass. */ -void x264_param_apply_fastfirstpass( x264_param_t * ); +X264_API void x264_param_apply_fastfirstpass( x264_param_t * ); /* x264_param_apply_profile: * Applies the restrictions of the given profile. @@ -664,21 +693,12 @@ * decrease them. * * returns 0 on success, negative on failure (e.g. invalid profile name). */ -int x264_param_apply_profile( x264_param_t *, const char *profile ); +X264_API int x264_param_apply_profile( x264_param_t *, const char *profile ); /**************************************************************************** * Picture structures and functions ****************************************************************************/ -/* x264_bit_depth: - * Specifies the number of bits per pixel that x264 uses. This is also the - * bit depth that x264 encodes in. If this value is > 8, x264 will read - * two bytes of input data for each pixel sample, and expect the upper - * (16-x264_bit_depth) bits to be zero. - * Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the - * colorspace depth as well. */ -X264_API extern const int x264_bit_depth; - /* x264_chroma_format: * Specifies the chroma formats that x264 supports encoding. When this * value is non-zero, then it represents a X264_CSP_* that is the only @@ -843,17 +863,17 @@ /* x264_picture_init: * initialize an x264_picture_t. Needs to be done if the calling application * allocates its own x264_picture_t as opposed to using x264_picture_alloc. */ -void x264_picture_init( x264_picture_t *pic ); +X264_API void x264_picture_init( x264_picture_t *pic ); /* x264_picture_alloc: * alloc data for a picture. You must call x264_picture_clean on it. * returns 0 on success, or -1 on malloc failure or invalid colorspace. */ -int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ); +X264_API int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ); /* x264_picture_clean: * free associated resource for a x264_picture_t allocated with * x264_picture_alloc ONLY */ -void x264_picture_clean( x264_picture_t *pic ); +X264_API void x264_picture_clean( x264_picture_t *pic ); /**************************************************************************** * Encoder functions @@ -868,7 +888,7 @@ /* x264_encoder_open: * create a new encoder handler, all parameters from x264_param_t are copied */ -x264_t *x264_encoder_open( x264_param_t * ); +X264_API x264_t *x264_encoder_open( x264_param_t * ); /* x264_encoder_reconfig: * various parameters from x264_param_t are copied. @@ -883,7 +903,7 @@ * more so than for other presets, many of the speed shortcuts used in ultrafast cannot be * switched out of; using reconfig to switch between ultrafast and other presets is not * recommended without a more fine-grained breakdown of parameters to take this into account. */ -int x264_encoder_reconfig( x264_t *, x264_param_t * ); +X264_API int x264_encoder_reconfig( x264_t *, x264_param_t * ); /* x264_encoder_parameters: * copies the current internal set of parameters to the pointer provided * by the caller. useful when the calling application needs to know @@ -891,32 +911,32 @@ * of the encoder after multiple x264_encoder_reconfig calls. * note that the data accessible through pointers in the returned param struct * (e.g. filenames) should not be modified by the calling application. */ -void x264_encoder_parameters( x264_t *, x264_param_t * ); +X264_API void x264_encoder_parameters( x264_t *, x264_param_t * ); /* x264_encoder_headers: * return the SPS and PPS that will be used for the whole stream. * *pi_nal is the number of NAL units outputted in pp_nal. * returns the number of bytes in the returned NALs. * returns negative on error. * the payloads of all output NALs are guaranteed to be sequential in memory. */ -int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); +X264_API int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); /* x264_encoder_encode: * encode one picture. * *pi_nal is the number of NAL units outputted in pp_nal. * returns the number of bytes in the returned NALs. * returns negative on error and zero if no NAL units returned. * the payloads of all output NALs are guaranteed to be sequential in memory. */ -int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); +X264_API int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); /* x264_encoder_close: * close an encoder handler */ -void x264_encoder_close( x264_t * ); +X264_API void x264_encoder_close( x264_t * ); /* x264_encoder_delayed_frames: * return the number of currently delayed (buffered) frames * this should be used at the end of the stream, to know when you have all the encoded frames. */ -int x264_encoder_delayed_frames( x264_t * ); -/* x264_encoder_maximum_delayed_frames( x264_t *h ): +X264_API int x264_encoder_delayed_frames( x264_t * ); +/* x264_encoder_maximum_delayed_frames( x264_t * ): * return the maximum number of delayed (buffered) frames that can occur with the current * parameters. */ -int x264_encoder_maximum_delayed_frames( x264_t *h ); +X264_API int x264_encoder_maximum_delayed_frames( x264_t * ); /* x264_encoder_intra_refresh: * If an intra refresh is not in progress, begin one with the next P-frame. * If an intra refresh is in progress, begin one as soon as the current one finishes. @@ -930,7 +950,7 @@ * behavior is undefined. * * Should not be called during an x264_encoder_encode. */ -void x264_encoder_intra_refresh( x264_t * ); +X264_API void x264_encoder_intra_refresh( x264_t * ); /* x264_encoder_invalidate_reference: * An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients * system. When the client has packet loss or otherwise incorrectly decodes a frame, the encoder @@ -953,7 +973,7 @@ * Should not be called during an x264_encoder_encode, but multiple calls can be made simultaneously. * * Returns 0 on success, negative on failure. */ -int x264_encoder_invalidate_reference( x264_t *, int64_t pts ); +X264_API int x264_encoder_invalidate_reference( x264_t *, int64_t pts ); #ifdef __cplusplus } diff -Nru x264-0.152.2854+gite9a5903/x264res.rc x264-0.158.2988+git-20191101.7817004/x264res.rc --- x264-0.152.2854+gite9a5903/x264res.rc 2017-12-31 12:50:51.000000000 +0000 +++ x264-0.158.2988+git-20191101.7817004/x264res.rc 2019-11-09 05:16:29.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264res.rc: windows resource file ***************************************************************************** - * Copyright (C) 2012-2017 x264 project + * Copyright (C) 2012-2019 x264 project * * Authors: Henrik Gramner * @@ -64,7 +64,7 @@ #endif VALUE "FileVersion", X264_POINTVER VALUE "InternalName", "x264" - VALUE "LegalCopyright", "Copyright (C) 2003-2017 x264 project" + VALUE "LegalCopyright", "Copyright (C) 2003-2019 x264 project" #ifdef DLL VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll" #else