diff -Nru x264-0.157.2935+git545de2f/autocomplete.c x264-0.160.3011+gitcde9a93/autocomplete.c --- x264-0.157.2935+git545de2f/autocomplete.c 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/autocomplete.c 2020-07-13 10:30:21.000000000 +0000 @@ -0,0 +1,405 @@ +/***************************************************************************** + * autocomplete: x264cli shell autocomplete + ***************************************************************************** + * Copyright (C) 2018-2020 x264 project + * + * Authors: Henrik Gramner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "x264cli.h" +#include "input/input.h" + +#if HAVE_LAVF +#undef DECLARE_ALIGNED +#include +#include +#endif + +static const char * const level_names[] = +{ + "1", "1.1", "1.2", "1.3", "1b", + "2", "2.1", "2.2", + "3", "3.1", "3.2", + "4", "4.1", "4.2", + "5", "5.1", "5.2", + "6", "6.1", "6.2", + NULL +}; + +/* Options requiring a value for which we provide suggestions. */ +static const char * const opts_suggest[] = +{ + "--alternative-transfer", + "--aq-mode", + "--asm", + "--avcintra-class", + "--avcintra-flavor", + "--b-adapt", + "--b-pyramid", + "--colormatrix", + "--colorprim", + "--cqm", + "--demuxer", + "--direct", + "--frame-packing", + "--input-csp", + "--input-fmt", + "--input-range", + "--level", + "--log-level", + "--me", + "--muxer", + "--nal-hrd", + "--output-csp", + "--overscan", + "--pass", "-p", + "--preset", + "--profile", + "--pulldown", + "--range", + "--subme", "-m", + "--transfer", + "--trellis", "-t", + "--tune", + "--videoformat", + "--weightp", + NULL +}; + +/* Options requiring a value for which we don't provide suggestions. */ +static const char * const opts_nosuggest[] = +{ + "--b-bias", + "--bframes", "-b", + "--deblock", "-f", + "--bitrate", "-B", + "--chroma-qp-offset", + "--chromaloc", + "--cplxblur", + "--cqm4", + "--cqm4i", + "--cqm4ic", + "--cqm4iy", + "--cqm4p", + "--cqm4pc", + "--cqm4py", + "--cqm8", + "--cqm8i", + "--cqm8p", + "--crf", + "--crf-max", + "--crop-rect", + "--deadzone-inter", + "--deadzone-intra", + "--fps", + "--frames", + "--input-depth", + "--input-res", + "--ipratio", + "--keyint", "-I", + "--lookahead-threads", + "--merange", + "--min-keyint", "-i", + "--mvrange", + "--mvrange-thread", + "--nr", + "--opencl-device", + "--output-depth", + "--partitions", "-A", + "--pbratio", + "--psy-rd", + "--qblur", + "--qcomp", + "--qp", "-q", + "--qpmax", + "--qpmin", + "--qpstep", + "--ratetol", + "--ref", "-r", + "--rc-lookahead", + "--sar", + "--scenecut", + "--seek", + "--slices", + "--slices-max", + "--slice-max-size", + "--slice-max-mbs", + "--slice-min-mbs", + "--sps-id", + "--sync-lookahead", + "--threads", + "--timebase", + "--vbv-bufsize", + "--vbv-init", + "--vbv-maxrate", + "--video-filter", "--vf", + "--zones", + NULL +}; + +/* Options requiring a filename. */ +static const char * const opts_filename[] = +{ + "--cqmfile", + "--dump-yuv", + "--index", + "--opencl-clbin", + "--output", "-o", + "--qpfile", + "--stats", + "--tcfile-in", + "--tcfile-out", + NULL +}; + +/* Options without an associated value. */ +static const char * const opts_standalone[] = +{ + "--8x8dct", + "--aud", + "--bff", + "--bluray-compat", + "--cabac", + "--constrained-intra", + "--cpu-independent", + "--dts-compress", + "--fake-interlaced", + "--fast-pskip", + "--filler", + "--force-cfr", + "--mbtree", + "--mixed-refs", + "--no-8x8dct", + "--no-asm", + "--no-cabac", + "--no-chroma-me", + "--no-dct-decimate", + "--no-deblock", + "--no-fast-pskip", + "--no-mbtree", + "--no-mixed-refs", + "--no-progress", + "--no-psy", + "--no-scenecut", + "--no-weightb", + "--non-deterministic", + "--open-gop", + "--opencl", + "--pic-struct", + "--psnr", + "--quiet", + "--sliced-threads", + "--slow-firstpass", + "--ssim", + "--stitchable", + "--tff", + "--thread-input", + "--verbose", "-v", + "--weightb", + NULL +}; + +/* Options which shouldn't be suggested in combination with other options. */ +static const char * const opts_special[] = +{ + "--fullhelp", + "--help", "-h", + "--longhelp", + "--version", + NULL +}; + +static int list_contains( const char * const *list, const char *s ) +{ + if( *s ) + for( ; *list; list++ ) + if( !strcmp( *list, s ) ) + return 1; + return 0; +} + +static void suggest( const char *s, const char *cur, int cur_len ) +{ + if( s && *s && !strncmp( s, cur, cur_len ) ) + printf( "%s\n", s ); +} + +static void suggest_lower( const char *s, const char *cur, int cur_len ) +{ + if( s && *s && !strncasecmp( s, cur, cur_len ) ) + { + for( ; *s; s++ ) + putchar( *s < 'A' || *s > 'Z' ? *s : *s | 0x20 ); + putchar( '\n' ); + } +} + +static void suggest_num_range( int start, int end, const char *cur, int cur_len ) +{ + char buf[16]; + for( int i = start; i <= end; i++ ) + { + snprintf( buf, sizeof( buf ), "%d", i ); + suggest( buf, cur, cur_len ); + } +} + +#if HAVE_LAVF +/* Suggest each token in a string separated by delimiters. */ +static void suggest_token( const char *s, int delim, const char *cur, int cur_len ) +{ + if( s && *s ) + { + for( const char *tok_end; (tok_end = strchr( s, delim )); s = tok_end + 1 ) + { + int tok_len = tok_end - s; + if( tok_len && tok_len >= cur_len && !strncmp( s, cur, cur_len ) ) + printf( "%.*s\n", tok_len, s ); + } + suggest( s, cur, cur_len ); + } +} +#endif + +#define OPT( opt ) else if( !strcmp( prev, opt ) ) +#define OPT2( opt1, opt2 ) else if( !strcmp( prev, opt1 ) || !strcmp( prev, opt2 ) ) +#define OPT_TYPE( type ) list_contains( opts_##type, prev ) + +#define suggest( s ) suggest( s, cur, cur_len ) +#define suggest_lower( s ) suggest_lower( s, cur, cur_len ) +#define suggest_list( list ) for( const char * const *s = list; *s; s++ ) suggest( *s ) +#define suggest_num_range( start, end ) suggest_num_range( start, end, cur, cur_len ) +#define suggest_token( s, delim ) suggest_token( s, delim, cur, cur_len ) + +int x264_cli_autocomplete( const char *prev, const char *cur ) +{ + int cur_len = strlen( cur ); + if( 0 ); + OPT( "--alternative-transfer" ) + suggest_list( x264_transfer_names ); + OPT( "--aq-mode" ) + suggest_num_range( 0, 3 ); + OPT( "--asm" ) + for( const x264_cpu_name_t *cpu = x264_cpu_names; cpu->flags; cpu++ ) + suggest_lower( cpu->name ); + OPT( "--avcintra-class" ) + suggest_list( x264_avcintra_class_names ); + OPT( "--avcintra-flavor" ) + suggest_list( x264_avcintra_flavor_names ); + OPT( "--b-adapt" ) + suggest_num_range( 0, 2 ); + OPT( "--b-pyramid" ) + suggest_list( x264_b_pyramid_names ); + OPT( "--colormatrix" ) + suggest_list( x264_colmatrix_names ); + OPT( "--colorprim" ) + suggest_list( x264_colorprim_names ); + OPT( "--cqm" ) + suggest_list( x264_cqm_names ); + OPT( "--demuxer" ) + suggest_list( x264_demuxer_names ); + OPT( "--direct" ) + suggest_list( x264_direct_pred_names ); + OPT( "--frame-packing" ) + suggest_num_range( 0, 7 ); + OPT( "--input-csp" ) + { + for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ ) + suggest( x264_cli_csps[i].name ); +#if HAVE_LAVF + for( const AVPixFmtDescriptor *d = NULL; (d = av_pix_fmt_desc_next( d )); ) + suggest( d->name ); +#endif + } + OPT( "--input-fmt" ) + { +#if HAVE_LAVF + av_register_all(); + for( const AVInputFormat *f = NULL; (f = av_iformat_next( f )); ) + suggest_token( f->name, ',' ); +#endif + } + OPT( "--input-range" ) + suggest_list( x264_range_names ); + OPT( "--level" ) + suggest_list( level_names ); + OPT( "--log-level" ) + suggest_list( x264_log_level_names ); + OPT( "--me" ) + suggest_list( x264_motion_est_names ); + OPT( "--muxer" ) + suggest_list( x264_muxer_names ); + OPT( "--nal-hrd" ) + suggest_list( x264_nal_hrd_names ); + OPT( "--output-csp" ) + suggest_list( x264_output_csp_names ); + OPT( "--output-depth" ) + { +#if HAVE_BITDEPTH8 + suggest( "8" ); +#endif +#if HAVE_BITDEPTH10 + suggest( "10" ); +#endif + } + OPT( "--overscan" ) + suggest_list( x264_overscan_names ); + OPT2( "--partitions", "-A" ) + suggest_list( x264_partition_names ); + OPT2( "--pass", "-p" ) + suggest_num_range( 1, 3 ); + OPT( "--preset" ) + suggest_list( x264_preset_names ); + OPT( "--profile" ) + suggest_list( x264_valid_profile_names ); + OPT( "--pulldown" ) + suggest_list( x264_pulldown_names ); + OPT( "--range" ) + suggest_list( x264_range_names ); + OPT2( "--subme", "-m" ) + suggest_num_range( 0, 11 ); + OPT( "--transfer" ) + suggest_list( x264_transfer_names ); + OPT2( "--trellis", "-t" ) + suggest_num_range( 0, 2 ); + OPT( "--tune" ) + suggest_list( x264_tune_names ); + OPT( "--videoformat" ) + suggest_list( x264_vidformat_names ); + OPT( "--weightp" ) + suggest_num_range( 0, 2 ); + else if( !OPT_TYPE( nosuggest ) && !OPT_TYPE( special ) ) + { + if( OPT_TYPE( filename ) || strncmp( cur, "--", 2 ) ) + return 1; /* Fall back to default shell filename autocomplete. */ + + /* Suggest options. */ + suggest_list( opts_suggest ); + suggest_list( opts_nosuggest ); + suggest_list( opts_filename ); + suggest_list( opts_standalone ); + + /* Only suggest special options if no other options have been specified. */ + if( !*prev ) + suggest_list( opts_special ); + } + + return 0; +} diff -Nru x264-0.157.2935+git545de2f/ChangeLog x264-0.160.3011+gitcde9a93/ChangeLog --- x264-0.157.2935+git545de2f/ChangeLog 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/ChangeLog 1970-01-01 00:00:00.000000000 +0000 @@ -1,24597 +0,0 @@ -commit 545de2ffec6ae9a80738de1b2c8cf820249a2530 -Author: Anton Mitrofanov -Date: Sun Sep 23 20:47:44 2018 +0300 - - Fix possible double transpose of custom CQM if --level is not set - - Bug reported by Nicolas Gaullier - -commit b63c73dc5c37e5405bf032c9113c1daced3e45a4 -Author: Henrik Gramner -Date: Tue Aug 7 22:42:22 2018 +0200 - - cli: Fix linking with --system-libx264 on x86 - -commit fb17a6b5b51d02020fb0cadea2b27c7803e734ba -Author: Anton Mitrofanov -Date: Tue Aug 21 15:11:21 2018 +0300 - - Fix CAVLC+RDO in 4:4:4 - -commit 303c484ec828ed0d8bfe743500e70314d026c3bd -Author: Alexandra Hájková -Date: Wed Jul 11 19:28:20 2018 +0000 - - ppc: Optimize quant functions - - 1) using xxpermdi + merge instead of 2 merges improves quant_8x8 - performance by 5% - - 2) use vec_splats instead of vec_splat - - checkasm timings when compiled with gcc: - C: AltiVec: - before: after: - quant_2x2_dc: 57 163 46 - quant_4x4_dc: 141 162 57 - - dequant_4x4_cmp: 104 101 45 - dequant_4x4_flat: 104 106 46 - dequant_8x8_cmp: 412 208 147 - dequant_8x8_flat: 414 212 149 - -commit 44f1671369b54734db1775fe5155f17041344d8f -Author: Alexandra Hajkova -Date: Sun Jul 8 13:04:43 2018 -0500 - - ppc: Add support for Power9-only vec_absd - - Increases overall encoding speed on POWER9 by 8%. - -commit f8afe3820c84798e9e50623cf7349bdb98765926 -Author: Alexandra Hájková -Date: Fri Jun 29 16:50:20 2018 +0000 - - ppc: Optimize sub8x8_dct_dc - -commit 411c957d82d357250f3a3099727b1a2c84caaee9 -Author: Alexandra Hájková -Date: Thu Jun 21 18:36:32 2018 +0000 - - ppc: AltiVec add16x16_idct_dc - -commit 53fe16e51349c43c483e81afb1f08a39f843a234 -Author: Alexandra Hájková -Date: Sat Jun 23 14:58:17 2018 +0000 - - ppc: Optimize add8x8_idct_dc - -commit 62dcebbce2c3f34998aeb2ea76b89f51306e78e9 -Author: Luca Barbato -Date: Thu Jul 12 10:41:22 2018 +0200 - - ppc: Add compatibility macros for vec_xxpermdi - -commit d1a53926fb90e9f4a4f1605f4b2a8a945a73e1d2 -Author: Henrik Gramner -Date: Mon Jun 25 00:09:51 2018 +0200 - - Prefer a monotonic clock source if available - -commit 1d18f0e025e994b93233b8e8afa0c691bccc8fda -Author: Kieran Kunhya -Date: Wed Aug 30 16:05:41 2017 +0100 - - Add Sony XAVC, a flavour of AVC-Intra - -commit bc136ec6a0f863c42686a3bc9fa4c7820f83d413 -Author: Anton Mitrofanov -Date: Mon Jul 2 20:20:03 2018 +0300 - - Cosmetics: Fix indentation for multiline function prototypes - - It was broken in "Drop the x264 prefix" patch. - -commit 6dd1d3b5d9e16a5951ececb7351cd63f02b36435 -Author: Anton Mitrofanov -Date: Mon Apr 16 23:54:43 2018 +0300 - - Cosmetics: Use consistent "inline" attribute position - - Place it immediately after "static". - -commit 3d9ec58f27f1cd6732484246aaad59158b98af47 -Author: Henrik Gramner -Date: Thu Jan 25 22:17:57 2018 +0100 - - x86: AVX-512 plane_copy and plane_copy_swap - - Avoid the scalar C wrapper by utilizing opmasks to prevent overreading the - input buffer. - -commit 698c5a32e63a3ed6b976ed196abe479efd78530b -Author: Emanuele Ruffaldi -Date: Sat Jan 6 02:34:39 2018 +0100 - - 4:0:0 (monochrome) encoding support - - Virtually zero increase in compression efficiency compared to 4:2:0 with empty - chroma planes. Performance is better though, especially with fast settings. - -commit 814e61e88c809bb00d17c200a04e9c7d42a19bb5 -Author: Diego Biurrun -Date: Sun Feb 5 09:02:43 2017 +0100 - - Makefile improvements - - * Coalesce some install recipe lines - - * Remove empty addition of GPLed filters - - * Install libdir in recipes that directly require it - - * Coalesce etags/TAGS rules - - * Simplify fprofiled rule - -commit 28e4879842a86cc6bb63db0f5f386a3e9268fd46 -Author: Henrik Gramner -Date: Sun Apr 22 22:49:15 2018 +0200 - - x86inc: Improve SAVE/LOAD_MM_PERMUTATION macros - - Use register numbers instead of copying the full register names. This makes it - possible to change register widths in the middle of a function and keep the - mmreg permutations intact which can be useful for code that only needs larger - vectors for parts of the function in combination with macros etc. - - Also change the LOAD_MM_PERMUTATION macro to use the same default name as the - SAVE macro. This simplifies swapping from ymm to xmm registers or vice versa: - - SAVE_MM_PERMUTATION - INIT_XMM - LOAD_MM_PERMUTATION - -commit 8badb910847e94abb66686009e424bdce355c9f4 -Author: Henrik Gramner -Date: Sat Mar 31 13:49:56 2018 +0200 - - x86inc: Optimize VEX instruction encoding - - Most VEX-encoded instructions require an additional byte to encode when src2 - is a high register (e.g. x|ymm8..15). If the instruction is commutative we - can swap src1 and src2 when doing so reduces the instruction length, e.g. - - vpaddw xmm0, xmm0, xmm8 -> vpaddw xmm0, xmm8, xmm0 - -commit 0a84d986e7020f8344f00752e3600b9769cc1e85 -Author: Henrik Gramner -Date: Sat Mar 31 01:16:06 2018 +0200 - - x86inc: Fix VEX -> EVEX instruction conversion - - There's an edge case that wasn't properly handled. - -commit 9d33c8fefbb506377b943aba11cd99c74258c5de -Author: Anton Mitrofanov -Date: Tue Jul 31 22:54:33 2018 +0300 - - configure: Fix required version checks for lavf and swscale - -commit 34843deb060248514ecd9edd88d72c2c2d6b906a -Author: Anton Mitrofanov -Date: Fri Jul 20 08:37:43 2018 +0300 - - Fix float division by zero in weightp analysis - -commit 1c3174775c6c1789aaf10172e4cb619f91ecff4a -Author: Anton Mitrofanov -Date: Wed Jul 18 21:56:33 2018 +0300 - - Fix undefined behavior of left shift for CAVLC encoding - -commit a0253ebee0f4d854cf89934b5f420275862d0b5b -Author: Anton Mitrofanov -Date: Mon Jul 2 20:59:16 2018 +0300 - - Fix integer overflow in slicetype_path_cost - - The path cost for high resolutions can exceed COST_MAX. - -commit 2af2742821f0b08a4295055b41875e660d5a7746 -Author: Henrik Gramner -Date: Fri Jun 29 13:14:01 2018 +0200 - - cli: Fix preset help listing - - It was previously incorrect when --chroma-format or --bit-depth was - specified in configure. - -commit f5d929ab8faf2319dda10836f51803ba25f0ad07 -Author: Luca Barbato -Date: Sat Jun 23 13:14:28 2018 +0200 - - ppc: Fix zigzag_interleave - - The permv array has 3 elements - -commit 7737e6ad4acf1058aeb0f9802e2a3ca1e0a30d29 -Author: Henrik Gramner -Date: Sat Jun 2 20:35:10 2018 +0200 - - Fix clang stack alignment issues - - Clang emits aligned AVX stores for things like zeroing stack-allocated - variables when using -mavx even with -fno-tree-vectorize set which can - result in crashes if this occurs before we've realigned the stack. - - Previously we only ensured that the stack was realigned before calling - assembly functions that accesses stack-allocated buffers but this is - not sufficient. Fix the issue by changing the stack realignment to - instead occur immediately in all CLI, API and thread entry points. - -commit 26b99cce1f03f023dee98bef2ec3cd2eff319f8e -Author: Anton Mitrofanov -Date: Sun Apr 1 20:49:29 2018 +0300 - - Fix missing bs_flush in AUD writing - -commit da6b29b553bb56e16e99527733849735c2ea264c -Author: Anton Mitrofanov -Date: Sun Apr 1 20:39:30 2018 +0300 - - Fix possible undefined behavior of right shift - - 32-bit shifts are only defined for values in the range 0-31. - -commit 7e457290cdd6da592ae63aa25facc47cd09d2128 -Author: Anton Mitrofanov -Date: Sun Apr 1 20:34:18 2018 +0300 - - Make bs_align_10 imply bs_flush - - Now behaves the same as bs_align_0 and bs_align_1. - -commit 6afb67c6d7b71fcc6fc14d167f1fcf55623846f4 -Author: Anton Mitrofanov -Date: Sun Apr 1 17:52:47 2018 +0300 - - Fix theoretically incorrect cost_mv_fpel free - -commit 57dd6274e2da70bdb8220bc159976e3ac2aea017 -Author: Anton Mitrofanov -Date: Sun Apr 1 17:42:46 2018 +0300 - - configure: Fix ambiguous "$((" - -commit 0e6425e03e28213e73ae770df5e08fffba72d290 -Author: Anton Mitrofanov -Date: Mon Feb 19 19:53:38 2018 +0300 - - Fix --qpmax default value in fullhelp - -commit 5f7f950c80e330728ecb07bc133e17456870121a -Author: Henrik Gramner -Date: Sat Mar 31 01:31:57 2018 +0200 - - x86: Correctly use v-prefix for instructions with opmasks - - This was always required, but accidentally happened to work correctly - in a few cases. - -commit 3d90057e15abf257320c89bb7146fb0c92687fa6 -Author: Martin Storsjö -Date: Sat Mar 31 00:10:14 2018 +0300 - - configure: Only use gas-preprocessor with armasm for compiler=CL - - This picks the right assembler automatically for arm and aarch64 - llvm-mingw targets. - - This doesn't get the right assembler for clang setups when clang - acts like MSVC and uses MSVC headers though (where it perhaps - should use armasm as before), but that's probably an even more - obscure setup. - -commit 7d0ff22e8c96de126be9d3de4952edd6d1b75a8c -Author: Anton Mitrofanov -Date: Wed Jan 17 22:03:06 2018 +0300 - - Remove ARRAY_SIZE macro which is identical to ARRAY_ELEMS - -commit 4a158b00943c334ec9e0aabe6a919900c32e360e -Author: Henrik Gramner -Date: Sat Jan 6 17:47:42 2018 +0100 - - x86inc: Correctly set mmreg variables - -commit 40b47eed1338cef1ac66c98b4e393dfcf5d998ae -Author: Diego Biurrun -Date: Sun Feb 5 09:02:49 2017 +0100 - - .gitignore: Ignore TAGS file - -commit 6fce82284a0fb3edfa299b904b1559452a3b1094 -Author: Diego Biurrun -Date: Sun Feb 5 09:02:51 2017 +0100 - - Minor configure improvements - - * Drop empty addition of GPLed filters - - * Replace backticks with $() - -commit ca5408b13cf0e58a7505051861f20a63a7a6aec1 -Author: Henrik Gramner -Date: Mon Jan 1 15:05:48 2018 +0100 - - Bump dates to 2018 - -commit b019515ef4ad77022b849283c62612157e8458a7 -Author: Henrik Gramner -Date: Tue Jan 16 17:43:24 2018 +0100 - - Merge zero buffers - - Improves cache efficiency. - -commit d75b93b0e82cefa93e5db2d6b0be475566101431 -Author: Anton Mitrofanov -Date: Wed Jan 17 18:19:44 2018 +0300 - - rdo: Use ALIGNED_ARRAY for stack arrays - -commit 9384a7389b251b59a079ccc3d1af9edd42e3d5e6 -Author: Henrik Gramner -Date: Mon Jan 15 21:42:59 2018 +0100 - - Correctly align buffers for AVX and AVX-512 - - Fixes segfaults on Windows where the stack is only 16-byte aligned. - -commit b00bcafe53a166b63a179a2f41470cd13b59f927 -Author: Anton Mitrofanov -Date: Sun Dec 24 22:59:09 2017 +0300 - - Cosmetics - -commit 7c6b3ad50d9210d27be6953dfa6d24e5d183be18 -Author: Alexandra Hájková -Date: Sun May 21 17:40:45 2017 +0000 - - ppc: Add load_deinterleave_chroma_fenc_altivec - - 5x speed up vs C code. - -commit b461e015fd7efe3bb740ef0716bc41d76eff30c9 -Author: Martin Storsjö -Date: Thu Oct 26 13:09:46 2017 +0300 - - Update to the latest upstream version of gas-preprocessor - - This version supports converting aarch64 assembly for MS armasm64.exe. - -commit 61e8b5cc482b08d51e18b336081073736d963e7e -Author: Henrik Gramner -Date: Sun Oct 22 09:59:28 2017 +0200 - - input: Add a workaround for swscale overread bugs - - swscale can read past the end of the input buffer, which may result in - crashes if such a read crosses a page boundary into an invalid page. - - Work around this by adding some padding space at the end of the buffer when - using memory-mapped input frames. This may sometimes require copying the - last frame into a new buffer on Windows since the Microsoft memory-mapping - implementation has very limited capabilities compared to POSIX systems. - -commit 1221f097473a049a52fbb47aff2733321bd4661a -Author: Henrik Gramner -Date: Sun Oct 22 10:50:46 2017 +0200 - - filters/resize: Upgrade to a newer libavutil API - - Use the AVComponentDescriptor depth field instead of depth_minus1. - -commit 99ca611d2e667553e026f487dc787b595dde84c5 -Author: Martin Storsjö -Date: Wed Oct 18 10:40:02 2017 +0300 - - aarch64: Use ldurb/sturb for loads/stores with negative offsets - - The assembler (both gas and clang/llvm) automatically fixes this, - armasm64 doesn't. We can fix it in gas-preprocessor, but we should - also be using the right instruction form. - -commit f745815e593b788d846182c8d42eed4f72f7c33c -Author: Martin Storsjö -Date: Mon Oct 16 22:50:27 2017 +0300 - - configure: Add support for building with MSVC/armasm for ARM64 - -commit 7b13b31be60ed65bee615bab28c422e2df027ee1 -Author: Martin Storsjö -Date: Mon Oct 16 22:50:26 2017 +0300 - - arm: Check for __ELF__ instead of !__APPLE__, for using .arch/.fpu - - For windows, when building with armasm, we already filtered these out - with gas-preprocessor. - - By filtering them out already in the source, we can also build directly - with clang for windows (which also require wrapping the assembler in - gas-preprocessor for converting instructions to thumb form, but - gas-preprocessor doesn't and shouldn't filter out them in the clang - configuration). - -commit 12ca9a69e855c4d4b9000894f478bce665e4e02c -Author: Martin Storsjö -Date: Mon Oct 16 22:50:25 2017 +0300 - - aarch64: Don't .set a symbol named st2 - - This confuses gas-preprocessor, which tries to replace actual - st2 instructions by the integer 1 or 2. - -commit 06c8f6bab0fc8fa9b2df9a1af5d10c87c515edb4 -Author: Henrik Gramner -Date: Sat Oct 14 14:11:26 2017 +0200 - - Shrink the i4x4_mode cost_table array - - Only 17 elements are actually used. It was originally padded to 64 bytes to - avoid cache line splits in the x86 assembly, but those haven't really been - an issue on x86 CPU:s made in the past decade or so. - - Benchmarking shows no performance impact from dropping the padding, so - might as well remove it and save some cache. - -commit 344699fd386890ac1cf80a70a68a3ae16767ed62 -Author: Henrik Gramner -Date: Wed Oct 11 18:02:26 2017 +0200 - - x86: Remove some legacy CPU detection hacks - - Some ancient Pentium-M and Core 1 CPU:s had slow SSE units, and using MMX - was preferable. Nowadays many assembly functions in x264 completely lack MMX - implementations and falling back to C code will likely make things worse. - - Some misconfigured virtualized systems could sometimes also trigger this code - path and cause assertions. - -commit 0fe75403d7b40c0209c3df992632956292065cdc -Author: Henrik Gramner -Date: Wed Oct 11 17:58:36 2017 +0200 - - lavf: Upgrade to the new core decoding API - -commit dae7f18d2cc5c7eccfb73649cda458e3c8e2256e -Author: Vittorio Giovara -Date: Mon Oct 9 12:04:22 2017 -0400 - - lavf: Upgrade to some newer API:s - - * Use the codec parameters API instead of the AVStream codec field. - * Use av_packet_unref() instead of av_free_packet(). - * Use the AVFrame pts field instead of pkt_pts. - -commit 12611ec99bb52f4f2c1b114138d867b3a2aa182b -Author: Henrik Gramner -Date: Sun Oct 8 21:41:16 2017 +0200 - - x86: AVX-512 load_deinterleave_chroma_fdec - -commit d93851ec282eb069f91a6eddab3284f7766cd5bd -Author: Henrik Gramner -Date: Sun Oct 8 21:23:12 2017 +0200 - - x86: AVX-512 load_deinterleave_chroma_fenc - -commit 5b62ab59be01579ab37033cc86527df922efb843 -Author: Henrik Gramner -Date: Sat Oct 7 12:06:51 2017 +0200 - - x86: AVX-512 mbtree_fix8_pack and mbtree_fix8_unpack - - Takes advantage of opmasks to avoid having to use scalar code for the tail. - - Also make some slight improvements to the checkasm test. - -commit 08476ab1c0a9b741198677731373b173657fa079 -Author: Henrik Gramner -Date: Sat Oct 7 11:34:16 2017 +0200 - - x86: Faster mbtree_fix8_unpack - - Use a different multiplier in order to eliminate some shifts. - - About 25% faster than before. - -commit e3fae10bf7db9571d5c69ad910f10df625bad73e -Author: Anton Mitrofanov -Date: Fri Sep 22 17:28:18 2017 +0300 - - Don't force fast-intra for subme < 3 - - It have caused significant quality hit without any meaningful (if any) speed up. - -commit bdf27e783a8eb4a5bcae0cd0a950d6dc3d995bfe -Author: Anton Mitrofanov -Date: Fri Sep 22 17:18:55 2017 +0300 - - Make ref and i4x4_mode costs global instead of static - - Fixes some thread safety doubts and makes code cleaner. - Downside: slightly higher memory usage when calling multiple encoders from the same application. - -commit fefc3fa1fa98a7bac4eaf3c8e6e1c52b7e427ddd -Author: Anton Mitrofanov -Date: Fri Sep 22 17:05:06 2017 +0300 - - Fix thread safety of x264_threading_init() and use of X264_PTHREAD_MUTEX_INITIALIZER with win32thread - -commit 694d031c1d120a8b578f60eeccf14fcf9ca4200e -Author: Anton Mitrofanov -Date: Fri Sep 22 16:59:13 2017 +0300 - - configure: Improvements - - Log result of pkg-config checks to config.log. - Fix lavf support detection for pkg-config fallback case. - Fix detection of linking dependencies errors for lavf/lsmash/gpac. - Cosmetics. - -commit 5d4031618e9feedcb527fd4e5a91bc06e30b70b4 -Author: Anton Mitrofanov -Date: Thu Aug 17 23:51:14 2017 +0300 - - flv: Fix one frame video total duration - -commit 8b9c89d331f5a2d6335ff9b08abc8d5c94428731 -Author: Anton Mitrofanov -Date: Thu Aug 17 23:46:23 2017 +0300 - - flv: Split FrameType and CodecID values - -commit 95cdb743463f723cea58c8ae01d7762f7ae9965c -Author: Vittorio Giovara -Date: Tue Aug 8 15:40:45 2017 +0200 - - Support writing the alternative transfer SEI message - -commit c98d02bebd6dd04b61306ee27712aeff96f19f29 -Author: Vittorio Giovara -Date: Tue Aug 8 14:56:43 2017 +0200 - - Support 04/2017 color matrix and transfer values - -commit 71ed44c7312438fac7c5c5301e45522e57127db4 -Author: Vittorio Giovara -Date: Fri Jan 6 15:23:38 2017 +0100 - - Unify 8-bit and 10-bit CLI and libraries - - Add 'i_bitdepth' to x264_param_t with the corresponding '--output-depth' CLI - option to set the bit depth at runtime. - - Drop the 'x264_bit_depth' global variable. Rather than hardcoding it to an - incorrect value, it's preferable to induce a linking failure. If applications - relies on this symbol this will make it more obvious where the problem is. - - Add Makefile rules that compiles modules with different bit depths. Assembly - on x86 is prefixed with the 'private_prefix' define, while all other archs - modify their function prefix internally. - - Templatize the main C library, x86/x86_64 assembly, ARM assembly, AARCH64 - assembly, PowerPC assembly, and MIPS assembly. - - The depth and cache CLI filters heavily depend on bit depth size, so they - need to be duplicated for each value. This means having to rename these - filters, and adjust the callers to use the right version. - - Unfortunately the threaded input CLI module inherits a common.h dependency - (input/frame -> common/threadpool -> common/frame -> common/common) which - is extremely complicated to address in a sensible way. Instead duplicate - the module and select the appropriate one at run time. - - Each bitdepth needs different checkasm compilation rules, so split the main - checkasm target into two executables. - -commit 2451a7282463f68e532f2eee090a70ab139bb3e7 -Author: Vittorio Giovara -Date: Fri Jan 6 17:50:40 2017 +0100 - - Change default QP parameters initialization - - qp is modified to require a valid value before use, while qp_max is set - to maximum allowable value (and clipped later on). - - This is needed so that param functions do not depend on bit depth size. - -commit 7839a9e1f03b49e3e0cbfcb3091093af7c6d54ee -Author: Vittorio Giovara -Date: Tue Jan 17 17:07:42 2017 +0100 - - aarch64: Set the function symbol prefix in a single location - -commit 498cca0b74ab90c363b761083c7fdcf56fc60904 -Author: Vittorio Giovara -Date: Tue Jan 17 17:04:19 2017 +0100 - - arm: Set the function symbol prefix in a single location - -commit 8f2437d33301faaf0e2fcaff16e2b01e9bbe27ae -Author: Vittorio Giovara -Date: Fri Jan 27 11:58:33 2017 +0100 - - Drop the x264 prefix from static functions and variables - -commit 4e2ed4087ac1621f946b83366e1f53a1326d7424 -Author: Anton Mitrofanov -Date: Thu Aug 17 23:25:31 2017 +0300 - - configure: Check for strtok_r compiler support - -commit d1eebb2927da15c41c7c180d398b0cdad3d1f396 -Author: Henrik Gramner -Date: Sun Aug 6 17:17:55 2017 +0200 - - cabac: Make the cabac_contexts array static - - Also drop the x264 prefix from all static cabac arrays. - -commit 3f9f6554a4cfa4189855756860a61ceb2f2a41a3 -Author: Henrik Gramner -Date: Thu Aug 17 18:04:13 2017 +0200 - - x86: AVX-512 pixel_satd_x3 and pixel_satd_x4 - -commit dd399ab862e2271e869bc8aefcb3166180ecdb10 -Author: Henrik Gramner -Date: Mon Aug 14 23:13:44 2017 +0200 - - x86: Shrink the x86-64 cabac coeff_last tables - - Use dword instead of qword entries. Cuts the size of the tables in half - which allows each table fit inside a single cache line. - - When PIC is disabled dwords are enough to store absolute addresses. - - When PIC is enabled we can store dword offsets relative to the start of - the table and simply add the address of the table to the offset in order - to calculate the full address. This approach also have the advantage of - eliminating a whole bunch of run-time .data relocations. - -commit d463a92e3b6f8ec04d54cc6c437892f9ffa98e29 -Author: Henrik Gramner -Date: Wed Aug 16 15:59:16 2017 +0200 - - x86inc: Support creating global symbols from local labels - - On ELF platforms such symbols needs to be flagged as functions with the - correct visibility to please certain linkers in some scenarios. - -commit 67b5c961046621a4554a9577e68cd9e31a212091 -Author: Henrik Gramner -Date: Tue Aug 15 16:11:32 2017 +0200 - - x86inc: Use .rdata instead of .rodata on Windows - - The standard section for read-only data on Windows is .rdata. Nasm will - flag non-standard sections as executable by default which isn't ideal. - -commit f15d366510cc60d9d9b2aeb576cade5b94509f37 -Author: Henrik Gramner -Date: Sat Aug 5 00:43:26 2017 +0200 - - x86inc: Set the correct cpuflag for AES-NI instructions - -commit 1ae63361304e952ac625a7016f2cf4a64e39a314 -Author: Henrik Gramner -Date: Sat Aug 5 00:09:52 2017 +0200 - - x86inc: Enable AVX emulation for floating-point pseudo-instructions - - There are 32 pseudo-instructions for each floating-point comparison - instruction, but only 8 of them are actually valid in legacy-encoded mode. - The remaining 24 requires the use of VEX-encoded (v-prefixed) instructions - and can therefore be disregarded for this purpose. - -commit 1e27313c12154dd3922ef7ab9508a4320e83c2ac -Author: Henrik Gramner -Date: Fri Aug 4 23:09:00 2017 +0200 - - configure: Increase x86 stack alignment on clang - -commit e9a5903edf8ca59ef20e6f4894c196f135af735e -Author: Anton Mitrofanov -Date: Sun Oct 22 20:18:39 2017 +0300 - - x86: Fix stack alignment for x264_cabac_encode_ue_bypass call - - Fix MSVS fprofiled build for win64 - -commit 45e6eb6006d1d23b6f69a1cfb62a86dc67092a81 -Author: Anton Mitrofanov -Date: Sun Oct 22 16:18:29 2017 +0300 - - mips: Fix incorrect pointers to msa optimized functions - -commit 09705c0b68232a05da8cc672c7c6092071eb4a21 -Author: Henrik Gramner -Date: Fri Aug 11 16:41:31 2017 +0200 - - Fix cpu capabilities listing on older x86 operating systems - - Some cpuflags would previously be displayed incorrectly when running older - operating systems without AVX support on modern CPU:s. - -commit ba24899b0bf23345921da022f7a51e0c57dbe73d -Author: Henrik Gramner -Date: Sat Jun 24 15:12:57 2017 +0200 - - x86: AVX-512 pixel_avg_weight_w8 - -commit d3214e6b102701911fc9d5fc92435e79e8b49100 -Author: Henrik Gramner -Date: Sat Jun 24 14:26:25 2017 +0200 - - x86: AVX-512 pixel_avg_weight_w16 - -commit 1d9dee2e9be717fcde416854f902db776312f141 -Author: Henrik Gramner -Date: Thu Jun 22 19:51:28 2017 +0200 - - x86: AVX-512 sub8x16_dct_dc - -commit f672795407bf90045e399eb057e5b2426d79f961 -Author: Henrik Gramner -Date: Thu Jun 22 11:26:21 2017 +0200 - - x86: AVX-512 sub8x8_dct_dc - -commit 0af1c6d0d0cc54ba4f888db39247774edcf19b44 -Author: Henrik Gramner -Date: Thu Jun 1 22:13:19 2017 +0200 - - x86: AVX-512 add8x8_idct - -commit 9034085265e5ca56e801c3efbf5c538fcc17c82b -Author: Henrik Gramner -Date: Sat Jun 10 16:01:53 2017 +0200 - - x86: AVX-512 sub16x16_dct - -commit 774c6c76d081305d9c891091e1d4694acb3f8a68 -Author: Henrik Gramner -Date: Wed Jun 7 16:55:48 2017 +0200 - - x86: AVX-512 sub8x8_dct - -commit 2d653411c2135377fb8c956e897880ff997b50ec -Author: Henrik Gramner -Date: Thu Jun 8 21:14:08 2017 +0200 - - x86: AVX-512 sub4x4_dct - -commit 07483f72d7e1a4f7079a429dd1370f4221006862 -Author: Henrik Gramner -Date: Sun May 28 16:12:33 2017 +0200 - - x86: AVX-512 mbtree_propagate_list - - Uses gathers and scatters in combination with conflict detections to - vectorize the scalar part. - - Also improve the checkasm test to try different mb_y values and check - for out-of-bounds writes. - -commit 1a88481b85da964aadae1e05347342b03be17712 -Author: James Darnley -Date: Fri Jun 9 14:08:16 2017 +0200 - - x86inc: Add aesni cpuflag define - - Upstreaming this from FFmpeg. Unused in x264. - -commit 98e9543b4c39360326e6d5bf266c0c634cb9ee2e -Author: Martin Storsjö -Date: Mon May 29 12:13:03 2017 +0300 - - aarch64: Update the var2 functions to the new signature - - The existing functions could easily be used by just calling them - twice - this would give the following cycle numbers from checkasm: - - var2_8x8_c: 4110 - var2_8x8_neon: 1505 - var2_8x16_c: 8019 - var2_8x16_neon: 2545 - - However, by merging both passes into the same function, we get the - following speedup: - var2_8x8_neon: 1205 - var2_8x16_neon: 2327 - -commit 824802ad5a877244fb9eb48a892ed348736af5b0 -Author: Martin Storsjö -Date: Mon May 29 12:13:02 2017 +0300 - - arm: Update the var2 functions to the new signature - - The existing functions could easily be used by just calling them - twice - this would give the following cycle numbers from checkasm: - - Cortex A7 A8 A9 A53 - var2_8x8_c: 7302 5342 5050 4400 - var2_8x8_neon: 2645 1612 1932 1715 - var2_8x16_c: 14300 10528 10020 8637 - var2_8x16_neon: 5127 2695 3217 2651 - - However, by merging both passes into the same function, we get the - following speedup: - var2_8x8_neon: 2312 1190 1389 1300 - var2_8x16_neon: 4862 2130 2293 2422 - -commit 6f8aa71ce797be01fd2ebe53c072a6696ea19828 -Author: Henrik Gramner -Date: Wed Feb 15 22:00:25 2017 +0100 - - Add support for levels 6, 6.1, and 6.2 - - These levels were added in the 2016-10 revision of the H.264 specification and - improves support for content with high resolutions and/or high frame rates. - - Level 6.2 supports 8K resolution at 120 fps. - - Also shrink the x264_levels array by using smaller data types. - -commit 2baa28c880d11377115bbd5508e72053f6ba61f5 -Author: Henrik Gramner -Date: Thu Mar 23 17:51:09 2017 +0100 - - Use a larger integer type for the slice_table array - - Makes it possible to use slicing with resolutions larger than 2^24 pixels. - -commit c9d2c1c80b25c6ae15c41b200ec44ac2dabce725 -Author: Henrik Gramner -Date: Sun Feb 19 10:48:33 2017 +0100 - - analyse: Reduce the size the cost_mv arrays - - Use a dynamic size depending on the MV range. Reduces memory consumption by - up to a few megabytes. - - Drop a related old miscompilation check since it may otherwise cause an - out-of-bounds memory access. - - Also remove an unused extern variable declaration. - -commit d46a5a463f0de5ec479d256af72bba3de4ba2d1a -Author: Anton Mitrofanov -Date: Wed May 31 02:52:16 2017 +0300 - - Fix CABAC+8x8dct in 4:4:4 - - Use the correct ctxIdxInc calculation for coded_block_flag. - -commit 79b36f27a57dd511eefead6d5422689220c767b5 -Author: Anton Mitrofanov -Date: Tue Jun 6 02:07:21 2017 +0300 - - Fix 8x8dct in lossless encoding - - Change V and H intra prediction in lossless (TransformBypassModeFlag == 1) - macroblocks to correctly adhere to the specification. Affects lossless - encoding with 8x8dct or mix of lossless with normal macroblocks. - - 8x8dct has already been disabled in lossless mode for some time due to - being out-of-spec but this will allow us to re-enable it again. - -commit 68a550217c8d0fae6229c5b322b6810fe9652ef3 -Author: Anton Mitrofanov -Date: Thu Jun 8 18:35:21 2017 +0300 - - mbtree: Fix buffer overflow - - Could occur on the 1st pass in combination with --fake-interlaced and - some input heights due to allocating a too small buffer. - -commit df79067c0cf33da712d344b5f8869be7eaf326f3 -Author: Henrik Gramner -Date: Tue May 23 16:40:26 2017 +0200 - - x86: Avoid self-relative expressions on macho64 - - Functions that uses self-relative expressions in the form of [foo-$$] - appears to cause issues on 64-bit Mach-O systems when assembled with nasm. - Temporarily disable those functions on macho64 for the time being until - we've figured out the root cause. - -commit f1ac7122645bbeb56e7a4401f71a7055cb2431c4 -Author: Anton Mitrofanov -Date: Mon May 22 23:59:32 2017 +0300 - - configure: Don't try to detect clang by $CC - - Only check if option -Werror=unknown-warning-option is supported before adding it - -commit b4d811df4fd7dbb9220fe2c8f2a2c2a6ba2bbc87 -Author: Martin Storsjö -Date: Mon May 22 13:10:46 2017 +0300 - - checkasm: Use the right variable in a loop condition - - Prior to this, this loop hasn't run at all. The condition has been - the same since it was introduced in 5b0cb86f. - - This issue was pointed out by a clang warning. - -commit a3d24462ae284bf03958f0ed41e824dd7d48e15e -Author: Anton Mitrofanov -Date: Mon May 22 22:02:34 2017 +0300 - - x86: Fix linking with 8-bit depth shared libx264 - -commit d1fe6fd1c0930d88da90f23f6d5fdb6ceaf6b0a9 -Author: Henrik Gramner -Date: Mon May 15 00:18:36 2017 +0200 - - x86: Only enable AVX-512 in 8-bit mode - -commit 6151882671b6f9e1ceec2cdb76dd1123c8dc766f -Author: Henrik Gramner -Date: Fri May 12 00:43:43 2017 +0200 - - x86: AVX-512 cabac_block_residual - -commit 4579616543f2e701ee9510f5eb57e31a3ef99e10 -Author: Henrik Gramner -Date: Wed May 10 18:36:59 2017 +0200 - - x86: AVX-512 pixel_sad_x3 and pixel_sad_x4 - - Covers all variants: 4x4, 4x8, 8x4, 8x8, 8x16, 16x8, and 16x16. - -commit 993eb2079e45619098241e14806fc70030968af6 -Author: Henrik Gramner -Date: Sun May 7 23:35:49 2017 +0200 - - x86: AVX-512 pixel_sad - - Covers all variants: 4x4, 4x8, 4x16, 8x4, 8x8, 8x16, 16x8, and 16x16. - -commit 2463174c119cef4f7e6a36a1151054fbb268b082 -Author: Henrik Gramner -Date: Thu May 4 21:53:28 2017 +0200 - - x86: AVX-512 decimate_score - - Also drop the MMX versions and improve the SSE2, SSSE3 and AVX2 versions. - -commit 49fb50a67cc41e4bed2dd66f7beed12797249cd9 -Author: Henrik Gramner -Date: Mon May 1 14:55:45 2017 +0200 - - x86: AVX-512 pixel_var2_8x8 and 8x16 - -commit 92c074e27f6bfccee42b41c183203b7b2763a94d -Author: Henrik Gramner -Date: Mon May 1 14:54:32 2017 +0200 - - Rework pixel_var2 - - The functions are only ever called with pointers to fenc and fdec and the - strides are always constant so there's no point in having them as parameters. - - Cover both the U and V planes in a single function call. This is more - efficient with SIMD, especially with the wider vectors provided by AVX2 and - AVX-512, even when accounting for losing the possibility of early termination. - - Drop the MMX and XOP implementations, update the rest of the x86 assembly - to match the new behavior. Also enable high bit-depth in the AVX2 version. - - Comment out the ARM, AARCH64, and MIPS MSA assembly for now. - -commit 4c48f9e751e969188d606eb15aeada7f652c9db9 -Author: Henrik Gramner -Date: Sat Apr 29 14:26:40 2017 +0200 - - x86: AVX-512 pixel_var_8x8, 8x16, and 16x16 - - Make the SSE2, AVX, and AVX2 versions a bit faster. - - Drop the MMX and XOP versions. - -commit 1cf7baa462ca52de7f07d6e4c795853900bb50bb -Author: Henrik Gramner -Date: Fri Apr 28 21:35:25 2017 +0200 - - x86: AVX-512 pixel_sa8d_8x8 - -commit 386050088a66aa66bcaebb9b6b4b0a2b6af76a73 -Author: Henrik Gramner -Date: Thu Apr 13 23:56:04 2017 +0200 - - x86: AVX-512 pixel_satd - - Covers all variants: 4x4, 4x8, 4x16, 8x4, 8x8, 8x16, 16x8, and 16x16. - -commit 2eceefe89fea91bbc7d5af2a1b4a9047d8da7805 -Author: Henrik Gramner -Date: Wed Apr 19 16:39:48 2017 +0200 - - x86: AVX-512 deblock_strength - - Also drop the MMX version and make some slight improvements to the SSE2, - SSSE3, AVX, and AVX2 versions. - -commit 3081ffa1c540d1df05123e0fab1937985573ac78 -Author: Henrik Gramner -Date: Wed Apr 12 16:21:09 2017 +0200 - - x86: AVX-512 plane_copy_deinterleave_v210 - -commit 95dc64c4efdf16404e58be9ff9da4e0acaa1a4b2 -Author: Henrik Gramner -Date: Sun Apr 9 20:34:28 2017 +0200 - - x86: AVX-512 memzero_aligned - - Reorder some elements in the x264_t.mb.pic struct to reduce the amount - of padding required. - - Also drop the MMX implementation in favor of SSE. - -commit c0cd7650cb65164d183d8f77d0697b7569a52917 -Author: Henrik Gramner -Date: Fri Apr 7 21:34:40 2017 +0200 - - x86: AVX and AVX-512 memcpy_aligned - - Reorder some elements in the x264_mb_analysis_list_t struct to reduce the - amount of padding required. - - Also drop the MMX implementation in favor of SSE. - -commit f29fbc6fd23e9bc2d800eb1246e8fa19a203b831 -Author: Henrik Gramner -Date: Thu Apr 6 16:06:34 2017 +0200 - - x86: AVX-512 dequant_8x8_flat16 - -commit 40aca29a164d5e5e6589d507bdcae6717d72f6bf -Author: Henrik Gramner -Date: Tue Apr 4 20:54:12 2017 +0200 - - x86: AVX-512 dequant_8x8 - -commit 74f7802bb7bd301299f8229a0552a7caf2b55434 -Author: Henrik Gramner -Date: Tue Apr 4 20:01:26 2017 +0200 - - x86: AVX-512 dequant_4x4 - -commit 3451ba3af49e58a720277615df3d8e4a4171986f -Author: Henrik Gramner -Date: Tue Mar 28 22:59:56 2017 +0200 - - x86: AVX-512 mbtree_propagate_cost - - Also make the AVX and AVX2 implementations slightly faster. - -commit 75f6f9b228c3498b8c9b0d97fc925c0a7e6e6f43 -Author: Henrik Gramner -Date: Mon Mar 27 18:19:53 2017 +0200 - - x86: AVX-512 coeff_last - -commit c3a1d1d892a79bc460c7fc192b0bf7a32c2ce0b2 -Author: Henrik Gramner -Date: Sun Mar 26 18:29:37 2017 +0200 - - x86: AVX-512 zigzag_interleave_8x8_cavlc - -commit edb22f57ba03718c1cb9781ba005aec20a1e50e0 -Author: Henrik Gramner -Date: Sun Mar 26 11:34:18 2017 +0200 - - x86: AVX-512 zigzag_scan_8x8_field - -commit 77b9a818fc622d0cdaa96aeb37339fbd5b1ef857 -Author: Henrik Gramner -Date: Sat Mar 25 22:13:22 2017 +0100 - - x86: AVX-512 zigzag_scan_4x4_field - -commit 724a577237f27cdb0c0fd18ef8ed32d39430796b -Author: Henrik Gramner -Date: Sat Mar 25 19:14:28 2017 +0100 - - x86: AVX-512 zigzag_scan_8x8_frame - - The vperm* instructions ignores unused bits, so we can pack the permutation - indices together to save cache and just use a shift to get the right values. - -commit 2b2f039512bde7c097280255c6376cf9a901e08e -Author: Henrik Gramner -Date: Sat Mar 25 19:14:22 2017 +0100 - - x86: AVX-512 zigzag_scan_4x4_frame - -commit 1878c7f2af0a9c73e291488209109782c428cfcf -Author: Henrik Gramner -Date: Fri May 12 00:03:10 2017 +0200 - - checkasm: x86: More accurate ymm/zmm measurements - - YMM and ZMM registers on x86 are turned off to save power when they haven't - been used for some period of time. When they are used there will be a - "warmup" period during which performance will be reduced and inconsistent - which is problematic when trying to benchmark individual functions. - - Periodically issue "dummy" instructions that uses those registers to - prevent them from being powered down. The end result is more consitent - benchmark results. - -commit 472ce3648aea3ddc16b7716eb114f4bcdb8fea8f -Author: Henrik Gramner -Date: Sat Mar 25 10:16:09 2017 +0100 - - x86: AVX-512 support - - AVX-512 consists of a plethora of different extensions, but in order to keep - things a bit more manageable we group together the following extensions - under a single baseline cpu flag which should cover SKL-X and future CPUs: - * AVX-512 Foundation (F) - * AVX-512 Conflict Detection Instructions (CD) - * AVX-512 Byte and Word Instructions (BW) - * AVX-512 Doubleword and Quadword Instructions (DQ) - * AVX-512 Vector Length Extensions (VL) - - On x86-64 AVX-512 provides 16 additional vector registers, prefer using - those over existing ones since it allows us to avoid using `vzeroupper` - unless more than 16 vector registers are required. They also happen to - be volatile on Windows which means that we don't need to save and restore - existing xmm register contents unless more than 22 vector registers are - required. - - Also take the opportunity to drop X264_CPU_CMOV and X264_CPU_SLOW_CTZ while - we're breaking API by messing with the cpu flags since they weren't really - used for anything. - - Big thanks to Intel for their support. - -commit d2b5f4873e2147452a723b61b14f030b2ee760a5 -Author: Henrik Gramner -Date: Sat Mar 18 18:50:36 2017 +0100 - - x86: Change assembler from yasm to nasm - - This is required to support AVX-512. - - Drop `-Worphan-labels` from ASFLAGS since it's enabled by default in nasm. - - Also change alignmode from `k8` to `p6` since it's more similar to `amdnop` - in yasm, e.g. use long nops without excessive prefixes. - -commit 8c2974255b01728d4eda2434cc1997c4a3ca5eff -Author: Henrik Gramner -Date: Sat May 6 12:26:56 2017 +0200 - - x86: Add some additional cpuflag relations - - Simplifies writing assembly code that depends on available instructions. - - LZCNT implies SSE2 - BMI1 implies AVX+LZCNT - AVX2 implies BMI2 - - Skip printing LZCNT under CPU capabilities when BMI1 or BMI2 is available, - and don't print FMA4 when FMA3 is available. - -commit 93bc2cbc66f0bf4616965dcd7e0eba89201c8086 -Author: Henrik Gramner -Date: Fri Apr 14 16:16:49 2017 +0200 - - x86: Faster SSE2 pixel_sad_16x16 and 16x8 - - Also make the order of fenc/fdec arguments a bit more consistent. - -commit 8ae2b62462176cd731a1cb8b5bdc9a38cba0fbe4 -Author: Anton Mitrofanov -Date: Mon May 15 00:40:52 2017 +0300 - - msvs/icl: Improve target host detection - -commit 181a920ad5d0acdc3a08418c0e9c95be4785b814 -Author: Alexandra Hájková -Date: Sat May 13 17:14:52 2017 +0000 - - ppc: Optimize add8x8_idct_dc - - Increases speedup compared to C from 2x to 6x. - -commit d0b905b901c5ee5989777cf437a7f20c1fa0a794 -Author: Henrik Gramner -Date: Sun Feb 19 10:33:16 2017 +0100 - - analyse: Faster min/max MV clipping - - Values only needs to be clipped in one direction. - -commit 1bde30193eb91d1bc69b00a27e6874eb88ed4eab -Author: Henrik Gramner -Date: Thu Feb 16 20:04:10 2017 +0100 - - slicetype_mb_cost: Clip MVs based on MV range - - Improves cost calculations, especially when a short MV range is used. - -commit dcf406978b9dda5c2b8aab80af5c1c47c78efd92 -Author: Henrik Gramner -Date: Sun Jan 29 21:38:43 2017 +0100 - - Support YUYV and UYVY packed 4:2:2 raw input - - Packed YUV is arguably more common than planar YUV when dealing with raw - 4:2:2 content. - - We can utilize the existing plane_copy_deinterleave() functions with some - additional minor constraints (we cannot assume any particular alignment - or overread the input buffer). - - Enables assembly optimizations on x86. - -commit aaa9aa83a111ed6f1db253d5afa91c5fc844583f -Author: Henrik Gramner -Date: Thu Apr 20 21:58:23 2017 +0200 - - x86: Utilize 3-arg instructions in AVX deblock - - Avoids some redundant register-register moves. - -commit a52d41c4d135c79373a86c3a82dcc2ec3f88b025 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:46 2017 +0200 - - configure: Support targeting ARM with MSVC tools - - Set up the right gas-preprocessor as assembler frontend in these cases, - using armasm as actual assembler. - - Don't try to add the -mcpu -mfpu options in this case. - - Check whether the compiler actually supports inline assembly. - - Check for the ARMv7 features in a different way for the MSVC compiler. - -commit b22a5db3c481b10b4a6ec190978d97b377750a12 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:45 2017 +0200 - - configure: Check for -lshell32 before forcibly adding it into LDFLAGSCLI - - When targeting the Windows Phone API subset, there is no shell32.lib. - - When targeting Windows Phone/RT, the CLI itself won't be built, but - LDFLAGSCLI are included in all later cases of cc_check within configure. - Therefore only add -lshell32 there if it actually is usable. - -commit 0aed59e74808f1cd22ee47c055a8eb4f367b2f55 -Author: Martin Storsjö -Date: Thu May 4 22:00:51 2017 +0300 - - arm: Always unconditionally declare .arch armv7-a - - We already unconditionally declare .fpu neon and try to build all the - neon codepaths (but only execute them conditionally based on a runtime - check). - - This fixes builds targeting armv6, where the rbit instruction isn't - available. This instruction is only used within a neon function in - any case, so there's little point in emulating it. - -commit 196d7676c8f40b7c1f8f2f4af64e09ebf4c9816b -Author: Martin Storsjö -Date: Fri Mar 24 11:33:44 2017 +0200 - - arm: Use .section .rodata for non-elf, non-mach platforms as well - - If targeting windows with armasm, gas-preprocessor can rewrite the - .section .rodata into the right construct for that platform. - -commit 9bffbabfecf0bda066362a1b76b62c5085257e18 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:41 2017 +0200 - - gas-preprocessor: Support conversion of additional arm instructions into thumb - - Convert muls into mul+cmp. - - Convert "and r0, sp, #xx" into "mov r0, sp", "and r0, r0, #xx". - - Convert ldr with a too large shift into add+ldr. This only works in the - special case when the base register is the same as the target for the ldr. - -commit 2e9bd88f27ed8f5f058e7e220070b7a15965cb8e -Author: Martin Storsjö -Date: Fri Mar 24 11:33:40 2017 +0200 - - arm: Explicitly declare using the .text segment in the function macro - - This fixes one issue in building with MS armasm via gas-preprocessor. - Without the .text segment specification, the object files assembled - fine, but linking failed. (armasm source files don't get the text/code - segment implied automatically if nothing is specified.) - -commit 64843af913e76fd7fb590e9227f678add96e8a3c -Author: Martin Storsjö -Date: Fri Mar 24 11:33:39 2017 +0200 - - osdep: Use the EXPAND macro on other cases of ALIGNED_ARRAY_EMU - - EXPAND is already used on the other cases where ALIGNED_ARRAY_EMU - is used on all platforms (originally needed for ICL, later also - required by MSVC); apply the same change (originally from 21ba91ae) - for the cases that only are used on ARM. - - This fixes use of ALIGNED_ARRAY_16 with MSVC when targeting ARM. - -commit 757091fe3abd0af0f45d11f52b652f0be2fb76f5 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:38 2017 +0200 - - Update to the latest version of gas-preprocessor.pl - - From http://git.libav.org/?p=gas-preprocessor.git - - This update contains changes from myself only. - -commit d13705191cdcbcd10d87524dbb0c26ba998d8dcc -Author: Martin Storsjö -Date: Fri Mar 24 11:33:37 2017 +0200 - - arm: Skip using gas-preprocessor for iOS on arm as well - - The few constructs that differ can easily be handled within the - source itself - tested to be working since at least Xcode 6. - -commit 3a3cfe32416efa4f966c0586411148236e4703c1 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:36 2017 +0200 - - arm: Use const macros in arm assembly where applicable - - This unifies the source code style, and allows building the code - with clang without gas-preprocessor. - -commit 1e92821c5a52c80ca4d1a9b6d038bec84be48b0a -Author: Martin Storsjö -Date: Fri Mar 24 11:33:35 2017 +0200 - - arm: Use commas between all macro arguments in arm assembly - - The clang built-in assembler requires proper commas between all macro - arguments. As long as gas-preprocessor is used when building with clang, - this isn't an issue. - -commit a84e6a486b991bffb2cc9f86b6e236978d251d2c -Author: Martin Storsjö -Date: Fri Mar 24 11:33:34 2017 +0200 - - aarch64: Skip invoking gas-preprocessor for iOS - - Clang can handle all the constructs used there these days, working - since Xcode 6 at least. - -commit 535fd2ec9985b9874d6ed23904404d0d2f5d40d6 -Author: Martin Storsjö -Date: Fri Mar 24 11:33:33 2017 +0200 - - aarch64: Use the const macro in the aarch64 checkasm assembly source - - This fixes building the source with clang for iOS without gas-preprocessor. - -commit bec87ba69421572282e473cf8f2e11c77285ed88 -Author: Henrik Gramner -Date: Wed Apr 12 23:26:32 2017 +0200 - - Windows: Add support for MSVC compilation with WSL - - In Windows 10 version 1703 (Creators Update) WSL supports calling native - Windows binaries from the Bash shell, but it requires using full file - names including extension, e.g. `cl.exe` instead of `cl`. - - We also don't have access to `cygpath`, so use a simple regex for - converting the dependencies to Unix paths that `make` can understand. - -commit 43e9a6157752c2a3c2cc6c6a7fa13da72033d1dd -Author: Henrik Gramner -Date: Sun Jan 29 22:58:24 2017 +0100 - - cli: Improve the --fullhelp raw demuxer input-csp listing - - Use the same logic for indentation as the lavf demuxer. - -commit 3538df12688fc4408f585c4e65ee92d5a4737b2c -Author: Anton Mitrofanov -Date: Sat May 20 21:17:59 2017 +0300 - - x86inc: Remove argument from WIN64_RESTORE_XMM - - The use of rsp was pretty much hardcoded there and probably didn't work - otherwise with stack_size > 0. - -commit e4b0974a4ea3a727f6cc8941e9accf7ef3ba0637 -Author: Henrik Gramner -Date: Sat Apr 22 20:30:35 2017 +0200 - - x86inc: Prefer r14/r15 over r12/r13 on x86-64 - - Due to a peculiarity in the ModR/M addressing encoding, the r12 and r13 - registers sometimes requires an additional byte when used as a base register. - - r14 and r15 doesn't have that issue, so prefer using them. - -commit 46a489b5e21cae3b4fea5d41cc285dcaf79d19e3 -Author: Henrik Gramner -Date: Thu Apr 20 19:16:51 2017 +0200 - - x86inc: Make REP_RET identical to RET in SSSE3+ functions - - There's no point in emitting a rep prefix before ret on modern CPUs. - -commit 50a9dd78263191474c948d53e837348abd0bf316 -Author: Henrik Gramner -Date: Wed Mar 29 16:43:57 2017 +0200 - - x86inc: Fix call with memory operands - - We overload the `call` instruction with a macro, but it would misbehave when - the macro argument wasn't a valid identifier. Fix it by explicitly checking - if the argument is an identifier. - -commit d13b4c3a9574cd2fbd5407c7dfc58eeff72d2080 -Author: Henrik Gramner -Date: Sun Jan 29 16:41:33 2017 +0100 - - osdep: Rework alignment macros - - Drop ALIGNED_N and ALIGNED_ARRAY_N in favor of using explicit alignment. - - This will allow us to increase the native alignment without unnecessarily - increasing the alignment of everything that's currently 32-byte aligned. - -commit 5840e200a0f1869a0596c5ed75c76f4d3221dd68 -Author: Vittorio Giovara -Date: Mon Jan 30 22:14:57 2017 +0100 - - Move cabac_block_residual function declarations - -commit a2d2621cc5741414b1f1adfbc08f19f1cc763847 -Author: Vittorio Giovara -Date: Mon Jan 30 22:14:59 2017 +0100 - - Recursively delete conftest files - - On OS X, one of the conftest files might be a directory named `conftest.dSYM`. - -commit 988ce459433fd3f978d632e8fc0ef9c19c94a6a1 -Author: Vittorio Giovara -Date: Mon Jan 30 22:14:56 2017 +0100 - - Drop unused function declarations - -commit fb3f97833cbe3305eb613633e604f424d6d2d096 -Author: Vittorio Giovara -Date: Fri Jan 27 18:06:39 2017 +0100 - - x86: Adjust cache64_ssse3 function suffixes - - Makes those function names more consistent with other similar functions. - -commit a77f3917cc6ba5e1d3c20ca649d4114217976d53 -Author: Vittorio Giovara -Date: Fri Jan 27 16:21:16 2017 +0100 - - mc: Mark a function only used within the file as static - -commit 0ca36bfa3d2bf272da88b1df5abfc0406662989a -Author: Vittorio Giovara -Date: Fri Jan 27 16:21:15 2017 +0100 - - ppc: Drop two unused static functions - -commit d32d7bf1c6923a42cbd5ac2fd540ecbb009ba681 -Author: Henrik Gramner -Date: Fri May 19 16:08:34 2017 +0200 - - cli: Verify that yuv/y4m input has at least one frame of data - - Prevents a SIGBUS crash caused by attempting to access a memory-mapped - region beyond the end of the input file. - -commit 959e869c20ea151917695930d9ad0a7a9a2f90c5 -Author: Kaustubh Raste -Date: Fri Apr 14 15:29:31 2017 +0530 - - mips: Fix out-of-tree build - - Signed-off-by: Kaustubh Raste - -commit d6eb2c9630d40a2765d5092f87637f4e4d084ed1 -Author: Henrik Gramner -Date: Sat Mar 25 00:02:11 2017 +0100 - - checkasm: Fix load_deinterleave_chroma_fdec test - - The function only writes to parts of the destination buffer but the test - verifies the content of the entire buffer. The problem is that some earlier - IDCT functions clobbers the same part of the buffer with garbage when - benchmarked which would incorrectly cause test failures. - - Fix this by explicitly zeroing the buffers beforehand. - -commit a472b60daae0cac17d91ddf62ad4f474ded63e5b -Author: Henrik Gramner -Date: Fri Mar 24 22:27:42 2017 +0100 - - checkasm: Fix compilation on hardened x86-64 ELF systems - - Normal PC-relative relocations cannot be used for resolving the address of - external symbols on systems where ASLR results in the offset being larger - than 32 bits. We are required to to go through the PLT instead. - -commit 469ad705b1064207b6b1068d1e25a0a591021007 -Author: Martin Storsjö -Date: Thu Mar 23 15:05:38 2017 +0200 - - aarch64: Fix building checkasm for iOS - - On iOS, symbols are prefixed - this prefix gets added by the X() - macro. - -commit 93340ca300e7ce66f49e41b7c2ef4a0492a7e57c -Author: Martin Storsjö -Date: Thu Mar 23 15:05:37 2017 +0200 - - configure: Always enable PIC in aarch64 assembly for apple platforms - - This is similar to what we do for 32-bit ARM assembly as well. - - Fixes linker errors such as `ld: Absolute addressing not allowed in - arm64 code but used in '_x264_cabac_encode_terminal_asm' referencing - '_x264_cabac_range_lps' for architecture arm64`. - -commit 90a61ec76424778c050524f682a33f115024be96 -Author: Alexandra Hájková -Date: Mon Dec 5 10:28:53 2016 +0000 - - ppc: AltiVec plane_copy_deinterleave - -commit bd6b66dbf9fcf67b7ebb23e4e9249083191fb984 -Author: Alexandra Hájková -Date: Mon Jan 2 12:56:48 2017 +0000 - - ppc: AltiVec plane_copy_deinterleave_v210 - -commit 00f1670087db1b025a8088289de8938bf88a0d8b -Author: Alexandra Hájková -Date: Wed Dec 7 19:48:02 2016 +0000 - - ppc: AltiVec plane_copy_deinterleave_rgb - - Also add some missing vector types in ppccommon.h - -commit 5e1ed367d725f895eeadf358861ab52521a420d3 -Author: Vittorio Giovara -Date: Thu Jan 19 17:43:57 2017 +0100 - - ppc: Adjust AltiVec function suffix - - Architecture should always be the last element. - -commit 28ebb95d92278069b80ee729eb1884fe0981c6ae -Author: Vittorio Giovara -Date: Mon Jan 9 22:28:20 2017 +0100 - - Move the x264_mdate() declaration to the appropriate header - -commit 1d2420981aa004f051a0869c005776084f7d2a44 -Author: Vittorio Giovara -Date: Tue Jan 17 17:04:19 2017 +0100 - - arm/aarch64: Correctly prefix integral function symbols - -commit 4c4c495d58dbdea46a23947e4f202fc3b82fb891 -Author: Anton Mitrofanov -Date: Fri Jan 13 14:57:51 2017 +0100 - - x86: Avoid using hardcoded function symbol prefixes - -commit 2524fc3164d9f00b393d4254d2c5ea8f3b9d43b0 -Author: Henrik Gramner -Date: Wed Jan 18 21:57:14 2017 +0100 - - x86: AVX2 high bit-depth load_deinterleave_chroma - - load_deinterleave_chroma_fenc: 50% faster than AVX - load_deinterleave_chroma_fdec: 25% faster than AVX - -commit cce50082129d3c92bd41bc0afc5a8c8d93084c9c -Author: Henrik Gramner -Date: Wed Jan 18 21:46:55 2017 +0100 - - x86: AVX2 load_deinterleave_chroma_fenc - - 20% faster than SSSE3. - -commit c22c10ddb21e9f5af1da83d37122e6f7388e1342 -Author: Henrik Gramner -Date: Tue Jan 17 21:59:47 2017 +0100 - - x86: AVX2 plane_copy_deinterleave - - 50% faster than SSSE3 in 8-bit. - 25% faster than AVX in high bit-depth. - - Also drop the MMX versions of deinterleave functions in favor of SSE2. - -commit f4890275ca6523dfe5b4ae60279ae8597d9dbd4b -Author: Henrik Gramner -Date: Thu Jan 12 22:16:53 2017 +0100 - - x86: AVX2 plane_copy_deinterleave_rgb - - Around 15% faster than SSSE3. - -commit da71b556730c8eb6c12a0d6950a221a4e4a99ca6 -Author: Henrik Gramner -Date: Thu Jan 12 21:36:28 2017 +0100 - - x86: Faster plane_copy_deinterleave_rgb_sse2 - - 50% faster than the previous SSE2 function. - -commit 3c7bf52c5b0a849458a45b5628ed1cc4b898da5f -Author: Henrik Gramner -Date: Sun Jan 15 14:52:29 2017 +0100 - - x86util: Reduce code size of high bit-depth AVX LOAD_DIFF - - AVX supports unaligned memory operands which makes the SATD code a bit denser. - -commit c7a2e327bebd2b863c2620b6962fa18ab681e5dd -Author: Henrik Gramner -Date: Sun Jan 1 19:10:10 2017 +0100 - - Bump dates to 2017 - -commit 97eaef2ab82a46d13ea5e00270712d6475fbe42b -Author: Alexandra Hájková -Date: Sat Jan 21 12:34:49 2017 +0000 - - ppc: Fix the pre-VSX vec_vsx_st() fallback macro - - It would previously only work correctly with 8-bit data types. - - Fixes compilation with --disable-vsx. - -commit 2ebe09a4f583d108c6ec1caf70b2a7a289a8820d -Author: Alexandra Hájková -Date: Wed Jan 18 09:13:39 2017 +0000 - - Fix plane_copy_deinterleave_v210 on big-endian - -commit 79288d90471e246584d19054bdb5381982114126 -Author: Alexandra Hájková -Date: Wed Dec 21 13:13:43 2016 +0000 - - ppc: Avoid instantiating unused plane_copy functions - - Those functions are currently only used in 8-bit mode and results in - warnings in other bit depths. - -commit 2ebdb90bd32c3d1618b1c5b360bff750b82b1d0b -Author: Martin Storsjö -Date: Tue Dec 27 00:22:48 2016 +0200 - - arm: Load mb_y properly in mbtree_propagate_list_internal_neon - - The previous version, attempting to load two stack parameters at once, - only would have worked if they were interpreted and loaded as 32 bit - elements, not when loading them as 16 bit elements. - -commit b97ae0644f16bad2e2c9c9181264a946769a0aa0 -Author: Anton Mitrofanov -Date: Mon Oct 31 14:39:52 2016 +0300 - - analyse: Fix lambda table values - -commit b2b39dae0bd891c8d150b4f4c3a2a24d8d6c1431 -Author: Anton Mitrofanov -Date: Sat Nov 26 15:30:58 2016 +0300 - - Cosmetics - - Also make x264_weighted_reference_duplicate() static. - -commit 9c82d2b65534e477c972b811a4dd5004d0dd262e -Author: Alexandra Hájková -Date: Mon Nov 28 14:04:10 2016 +0000 - - ppc: AltiVec store_interleave_chroma - -commit ea1fee272b20e1bcff2a862ea9a29e151c9136a9 -Author: Alexandra Hájková -Date: Mon Nov 28 10:51:54 2016 +0000 - - ppc: AltiVec plane_copy_interleave - -commit 42348a8e664b091203a05d3e15555b5085afcac1 -Author: Alexandra Hájková -Date: Sat Nov 26 20:03:34 2016 +0000 - - ppc: AltiVec plane_copy_swap - -commit 2610019af8bfb8e71f813cd2188b9eccbc287c59 -Author: Alexandra Hájková -Date: Wed Nov 23 20:53:51 2016 +0100 - - ppc: AltiVec zigzag_interleave_8x8_cavlc - -commit 25e4e06fe8151f627a953fbd2bd39302436bf689 -Author: Alexandra Hájková -Date: Wed Nov 23 20:53:50 2016 +0100 - - ppc: AltiVec zigzag_scan_8x8_frame - -commit 99863c665a6d4ec58b7fcc4a8a791e9c8f35a86e -Author: Alexandra Hájková -Date: Mon Nov 14 15:06:06 2016 +0100 - - ppc: AltiVec sub8x8_dct_dc - -commit 42cb0a6813714b5380e23871a155e3820846d991 -Author: Alexandra Hájková -Date: Mon Nov 14 15:06:05 2016 +0100 - - ppc: AltiVec add8x8_idct_dc - -commit 983acc911543453449a65bd02bbdff4c8cfe8e6a -Author: Martin Storsjö -Date: Wed Nov 16 10:57:31 2016 +0200 - - checkasm: aarch64: Add filler args to make sure all parameters are passed on the stack - - This, combined with clobbering the stack space prior to the call, - increases the chances of finding cases where 32 bit parameters - are erroneously treated as 64 bit. - -commit 8ada354c9b5d72356c34c9ae3f787a6df4d61506 -Author: Martin Storsjö -Date: Wed Nov 16 10:57:30 2016 +0200 - - checkasm: aarch64: Clobber the stack before calling functions - -commit 62d604ac6dddbf553c1ff2432d899b61cc50d95a -Author: Alexandra Hájková -Date: Tue Nov 1 23:16:17 2016 +0100 - - ppc: Use vec_vsx_ld instead of VEC_LOAD/STORE macros - - Remove VEC_LOAD*, some of VEC_STORE* macros, some PREP* macros and - VEC_DIFF_H_OFFSET macro. - - Make sure the functions do not use deprected primitives. - -commit 16142d8ee2a974060ecbad0f495b5a5c6516a75e -Author: Luca Barbato -Date: Tue Nov 1 23:16:16 2016 +0100 - - ppc: Provide fallbacks for older architectures - -commit 2b741f81e51f92d053d87a49f59ff1026553a0f6 -Author: Luca Barbato -Date: Tue Nov 1 23:16:14 2016 +0100 - - ppc: Add VSX support to configure - -commit 1f7518182e3204cb14e87baffb0150a848167ddc -Author: Luca Barbato -Date: Tue Nov 1 23:16:13 2016 +0100 - - ppc: Manually unroll the horizontal prediction loop - - Doubles the speedup from the function (from being slower to be over - twice as fast than C). - -commit 0706ddb1df88d716cf73decba4d82b953011760c -Author: Henrik Gramner -Date: Sat Oct 8 17:20:18 2016 +0200 - - x86inc: Avoid using eax/rax for storing the stack pointer - - When allocating stack space with an alignment requirement that is larger - than the current stack alignment we need to store a copy of the original - stack pointer in order to be able to restore it later. - - If we chose to use another register for this purpose we should not pick - eax/rax since it can be overwritten as a return value. - -commit 4d5c8b01a48f72f9c40651e92c39294326a0863f -Author: Henrik Gramner -Date: Thu Dec 1 16:05:16 2016 +0100 - - Show the correct settings for --preset slow in --fullhelp - - The slow preset was recently adjusted but we forgot to update the - corresponding --fullhelp message to reflect the change. - -commit c996ed202e2d17d1d8ae42c42d0707e51c29bb93 -Author: Martin Storsjö -Date: Mon Nov 14 23:54:51 2016 +0200 - - checkasm: arm/aarch64: Fix the amount of space reserved for stack parameters - - Even if MAX_ARGS - 2 (for arm) or MAX_ARGS - 6 (for aarch64) parameters - are passed on the stack to checkasm_checked_call, we actually only - need to store MAX_ARGS - 4 (for arm) or MAX_ARGS - 8 (for aarch64) - parameters on the stack when calling the tested function. - -commit cd15b354a887943d525e6fd8096ad4b75692d2b2 -Author: Janne Grunau -Date: Mon Nov 14 23:54:50 2016 +0200 - - checkasm: arm: preserve the stack alignment in x264_checkasm_checked_call - - The stack used by x264_checkasm_checked_call_neon was a multiple of 4 - when the checked function is called. AAPCS requires a double word (8 byte) - aligned stack public interfaces. Since both calls are public interfaces - the stack is misaligned when the checked is called. - - This can cause issues if code called within this (which includes - the C implementations) relies on the stack alignment. - -commit 834e1b11e174f2694a4c81b4922c0c5f8778796a -Author: Martin Storsjö -Date: Wed Nov 16 10:56:14 2016 +0200 - - arm: Don't use vcmp.f64 for testing for an all-zeros register - - On iOS, vcmp.f64 can behave as if the register was zero, if the - register (interpreted as a f64), was a denormal number. - - The vcmp.f64 (and other VFP instructions) will trap to the kernel - (which is supposed to implement the FP operation, which it apparently - doesn't do properly on iOS) if the value is a denormal. If this happens, - the whole comparison ends up way more costly. - -commit a91e95fca2222ac0731e987a07f4b11c670f4556 -Author: Janne Grunau -Date: Wed Nov 16 10:49:14 2016 +0200 - - aarch64: Clear the upper half of int parameters in x264_plane_copy_core_neon - -commit 1eab3b402e1d7729da295024fa7eec8b09e30c20 -Author: Luca Barbato -Date: Tue Nov 1 23:16:18 2016 +0100 - - ppc: Fix hadamard for little-endian - - Extending to 16-bit works with flipped bytes. - -commit 75918e1849e1286885bfcfb0c348de885a702fb3 -Author: Anton Mitrofanov -Date: Thu Sep 22 00:17:48 2016 +0300 - - Correctly signal max_dec_frame_buffering with --keyint 1 - - According to E.2.1 it is inferred to be equal to 0 only if profile_idc is equal - to 44, 86, 100, 110, 122, or 244 and constraint_set3_flag is equal to 1. - -commit 72d53ab2ac7af24597a824e868f2ef363a22f5d4 -Author: Henrik Gramner -Date: Sat Sep 17 21:41:52 2016 +0200 - - x86: Faster pixel_ssim_4x4x2_core - -commit 8c07263ad9218bdc3e0f5b84d578968513885df7 -Author: Henrik Gramner -Date: Sat Sep 17 21:14:35 2016 +0200 - - x86: Deduplicate a constant in hpel_filter_c - -commit 9521b278adb92081f052c1b7bfc4b95651d88b07 -Author: Henrik Gramner -Date: Sat Sep 17 14:45:08 2016 +0200 - - x86: Faster pixel_ssd_nv12 - - Also drop the MMX2 version to simplify things. - -commit 75d0f9cc8770bc4f36785062116757d24eb44604 -Author: Henrik Gramner -Date: Sun Sep 11 15:32:54 2016 +0200 - - x86: SSE zigzag_scan_4x4_field - - Replaces the MMX2 version, one cycle faster. - - Also change the checkasm test to use the correct alignment macro. - -commit 0ce77f9eb71051c9a6121ec12c2abaac99ee628a -Author: Henrik Gramner -Date: Wed Sep 7 19:27:31 2016 +0200 - - x86: AVX2 mbtree_propagate_list - - SIMD part is around 25% faster than AVX on Haswell, around 7% - faster when including the runtime of the scalar C wrapper. - -commit 0c36239a4826f6e5a3cb873aca1814e389a46e29 -Author: Henrik Gramner -Date: Wed Sep 7 19:26:42 2016 +0200 - - x86: Move predict_16x16_dc_left calculations to asm - - 1-2 cycles faster and avoids some code duplication to decrease code size. - - Also drop the MMX2 implementation in favor of SSE2 to simplify things. - -commit 0cc8afd31212de013b26b10f58c608c9adcff2fc -Author: Anton Mitrofanov -Date: Thu Aug 18 19:00:48 2016 +0300 - - avs: support for AviSynth+ high bit-depth pixel formats - -commit dc0fe73636d34baeb3a64918b52db64d2a9e83bb -Author: Janne Grunau -Date: Fri Aug 26 20:26:56 2016 +0300 - - aarch64: implement x264_plane_copy_swap_neon - - plane_copy_swap_c: 27054 - plane_copy_swap_neon: 4152 - -commit eaf2fc20c8579714a48523b7ab8c05373708a25f -Author: Anton Mitrofanov -Date: Thu Aug 18 22:14:22 2016 +0300 - - Various cosmetics of semicolon use - -commit aae177c55141460f442de0572c4a434bf2ae20bc -Author: Henrik Gramner -Date: Thu Jul 28 21:58:40 2016 +0200 - - cli: Prefetch yuv/y4m input frames on Windows 8 and newer - - Use PrefetchVirtualMemory() (if available) on memory-mapped input frames. - - Significantly improves performance when the source file is not already - present in the OS page cache by asking the OS to bring in those pages from - disk using large, concurrent I/O requests. - - Most beneficial on fast encoding settings. Up to 40% faster overall with - --preset ultrafast, and up to 20% faster overall with --preset veryfast. - - This API was introduced in Windows 8, so call it conditionally. On older - Windows systems the previous behavior remains unchanged. - -commit 4e5adb87070c82b937c03e0cc030eae3578c251d -Author: Henrik Gramner -Date: Thu Jul 28 19:34:04 2016 +0200 - - Adjust --preset slow - - * Swap --me umh for --trellis 2. They have a similar effect on performance - but the latter gives slightly better results in most cases. - * Change --b-adapt from 2 to 1. Negligible difference in quality since the - b-adapt 1 improvements, but it's significantly faster. - - Also remove a redundant assignment from veryfast (--me hex is set by default). - -commit 1e4fb55a283ba90fef346033027af851f2a04468 -Author: Henrik Gramner -Date: Thu Jul 28 19:33:57 2016 +0200 - - ratecontrol_new: Simplify an expression in HRD timescale calculation - - Also gets rid of a false positive static analyser integer division warning. - -commit 17378b2028146fa54a1b2b90da62554935d9dcc2 -Author: Henrik Gramner -Date: Thu Jul 28 19:33:44 2016 +0200 - - gcc: Enable __sync_fetch_and_add() on x86-64 - - It was previously only enabled on 32-bit x86 for no reason, so 64-bit - systems had to use a mutex instead of a simple `lock xadd` instruction. - - Note that this code is only used in some very specific configurations - involving sliced threads. - -commit 86b71982e131eaa70125f8d0e725fcade9c4c677 -Author: Anton Mitrofanov -Date: Tue Sep 20 18:48:22 2016 +0300 - - mips: Fix high bit-depth compilation - -commit 1ea3c682ca12c7f13ea6f82b42bdc40afcfda87f -Author: Henrik Gramner -Date: Sat Sep 17 15:53:59 2016 +0200 - - checkasm: Fix compilation on Windows with --disable-thread - -commit 5caef139cf7d6b41a95ee9568625d36d1ae1c107 -Author: Janne Grunau -Date: Fri Aug 26 20:26:55 2016 +0300 - - arm/aarch64: use plane_copy wrapper macros - - Move the macros to common/mc.h to share them across all architectures. - Fixes possible buffer overreads if the width of the user supplied frames - is not a multiple of 16. - - Reported-by: Kirill Batuzov - -commit 3f5ed56d4105f68c01b86f94f41bb9bbefa3433b -Author: Henrik Gramner -Date: Sun Apr 3 17:28:33 2016 +0200 - - configure: Support specifying a custom pkg-config - -commit 7c9c687d8062f72b3ec300de8997bdae8277a741 -Author: Anton Mitrofanov -Date: Wed Jun 8 22:46:17 2016 +0300 - - Add support for new VUI parameters - - Support the new color primaries, transfer characteristics, and matrix - coefficients defined in the 2016-02 edition of the H.264 specification. - -commit 92515e8ff73491ef8a44c85e0bee265ba5791070 -Author: Henrik Gramner -Date: Sun Apr 24 14:10:22 2016 +0200 - - configure: Add link-time optimization support - - Enabled by using the --enable-lto configuration option. - - May give a slight performance improvement in some cases, but it can - also reduce performance in other cases (largely compiler-dependant) - so don't enable it by default. It also makes compilation (and linking - in particular) a fair bit slower. - - Note that some older versions of GNU binutils will incorrectly warn - about "memset used with constant zero length parameter" when linking - using LTO. This is due to a bug in binutils and can safely be ignored. - -commit b6267e0ff770545de88dfb5d3f176ea73f453730 -Author: Henrik Gramner -Date: Sun Apr 24 13:32:43 2016 +0200 - - configure: Fix clang detection with versioned binaries - - Correctly detect clang binaries that has the version number appended - as a suffix to the file name, e.g. `clang38`. - -commit 14a58532fea2c5f9e7b93c918476d842091c4268 -Author: Janne Grunau -Date: Sun Apr 24 14:38:56 2016 +0200 - - arm: Add asm for mbtree fixed point conversion - - 7-8 times faster on a cortex-a53 vs. gcc-5.3. - - mbtree_fix8_pack_c: 44114 - mbtree_fix8_pack_neon: 5805 - mbtree_fix8_unpack_c: 38924 - mbtree_fix8_unpack_neon: 4870 - -commit b6f189eb4c5646483f7901293944695167e71ed9 -Author: Janne Grunau -Date: Sun Apr 24 14:38:55 2016 +0200 - - aarch64: Add asm for mbtree fixed point conversion - - pack is ~7 times faster and unpack is ~9 times faster on a cortex-a53 - compared to gcc-5.3. - - mbtree_fix8_pack_c: 41534 - mbtree_fix8_pack_neon: 5766 - mbtree_fix8_unpack_c: 44102 - mbtree_fix8_unpack_neon: 4868 - -commit a5e06b9a435852f0125de4ecb198ad47340483fa -Author: Anton Mitrofanov -Date: Sun May 22 22:33:58 2016 +0300 - - Fix p4x4 analyse for 4:4:4 encoding with chroma ME - -commit 07221290db0a94bda1f6ece3fdf3c02675c8adce -Author: Anton Mitrofanov -Date: Sun May 22 22:18:34 2016 +0300 - - Fix 4:4:4 encoding with CQM - -commit 23ebc1f763936b7fcfc81e21530e1b65dbc503b9 -Author: Anton Mitrofanov -Date: Sun May 22 19:36:05 2016 +0300 - - Fix p4x4 RDO with CAVLC - -commit 740a8c556bd9b68e899d6991f3f987a443aa14aa -Author: Anton Mitrofanov -Date: Sat Apr 23 23:10:03 2016 +0300 - - Apply zone options a little bit earlier - - This way things like SAR changes will have full effect from the start frame. - -commit 928bd9d5def4f0ca5071ea176a11b816a01e6495 -Author: Anton Mitrofanov -Date: Sat Apr 23 22:45:44 2016 +0300 - - Fix corruption when using encoder_reconfig() with some parameters - - Changing parameters that affects SPS, like --ref for example, wasn't - behaving correctly previously. - - Probably a regression in r2373. - -commit 3b70645597bea052d2398005bc723212aeea6875 -Author: Anton Mitrofanov -Date: Wed Apr 13 21:54:25 2016 +0300 - - Clean up header includes - -commit 2102de2584e03fce4abac49eb37d5d7a0803380f -Author: Henrik Gramner -Date: Wed Apr 13 17:53:49 2016 +0200 - - Eliminate some compiler warnings on BSD - - Include in addition to . According to the POSIX - specification the prototypes for strcasecmp() and strncasecmp() are - declared in . On some systems they are also declared in - for compatibility reasons but we shouldn't rely on that. - - Define _POSIX_C_SOURCE only when it's required to do so. Some BSD - variants doesn't declare certain function prototypes otherwise. - -commit 64f4e24909924fceeea6e154d71b7dfbf586c7ea -Author: Henrik Gramner -Date: Tue Apr 12 21:33:54 2016 +0200 - - osx: Add -D_DARWIN_C_SOURCE to CFLAGS - - OSX doesn't like _POSIX_C_SOURCE being defined when _DARWIN_C_SOURCE isn't. - -commit 00597d74c6223f3694e2c6614ef0574d7fca6b22 -Author: Anton Mitrofanov -Date: Tue Apr 12 20:33:42 2016 +0300 - - Remove an unused parameter from x264_slicetype_frame_cost() - - The b_intra_penalty parameter is no longer used anywhere after the - improvements to the --b-adapt 1 algorithm. - -commit aa26e880bc2cd04cc81c776051d5e21d03fc975a -Author: Anton Mitrofanov -Date: Sun Apr 10 20:17:32 2016 +0300 - - Improve the --b-adapt 1 algorithm - - Roughly the same speed as before but with significantly better results, - comparable to --b-adapt 2. - -commit 24f25b6afd21488a93bd86098f98dfaf229fc149 -Author: Henrik Gramner -Date: Sun Apr 3 15:49:26 2016 +0200 - - analyse: i_sub_partition write combining - -commit 1507cfe80ecf5f8e240a35e9e9dc5a92bd25e792 -Author: Henrik Gramner -Date: Tue Mar 15 20:16:45 2016 +0100 - - x86: Use one less register in mbtree_propagate_cost_avx2 - - Avoids the need to save and restore xmm6 on 64-bit Windows. - -commit c82c7374938f4342971adf8b2495c3a1bbe621c4 -Author: Henrik Gramner -Date: Fri Mar 4 17:53:08 2016 +0100 - - x86: Add asm for mbtree fixed point conversion - - The QP offsets of each macroblock are stored as floats internally and - converted to big-endian Q8.8 fixed point numbers when written to the 2-pass - stats file, and converted back to floats when read from the stats file. - - Add SSSE3 and AVX2 implementations for conversions in both directions. - - About 8x faster than C on Haswell. - -commit be677efc6313ade5eddf722fdf097cce56df1344 -Author: Anton Mitrofanov -Date: Thu Apr 7 13:09:03 2016 +0300 - - x86inc: Enable AVX emulation in additional cases - - Allows emulation to work when dst is equal to src2 as long as the - instruction is commutative, e.g. `addps m0, m1, m0`. - -commit b5661d322866df647e6084061a471eceac214c28 -Author: Anton Mitrofanov -Date: Thu Apr 7 12:48:29 2016 +0300 - - x86inc: Improve handling of %ifid with multi-token parameters - - The yasm/nasm preprocessor only checks the first token, which means that - parameters such as `dword [rax]` are treated as identifiers, which is - generally not what we want. - -commit 283663d4c13088f4811c78b75318bda59d696b2d -Author: Anton Mitrofanov -Date: Mon Mar 28 18:35:38 2016 +0300 - - x86inc: Fix AVX emulation of some instructions - -commit 54fd697668d0a04246ad0b0e9955a6583b2bb8b6 -Author: Henrik Gramner -Date: Fri Mar 4 17:51:41 2016 +0100 - - x86inc: Fix AVX emulation of scalar float instructions - - Those instructions are not commutative since they only change the first - element in the vector and leave the rest unmodified. - -commit eeb9b66ddb0f27d8baaa8efa9597613e61140836 -Author: Henrik Gramner -Date: Sat Feb 27 20:34:39 2016 +0100 - - x86: dct2x4dc asm - - Only used in 4:2:2. MMX2 version implemented for 8-bit, SSE2 and AVX - versions implemented for high bit-depth. - - 2.5x faster on 32-bit and 1.6x faster on 64-bit compared to C on Ivy Bridge. - -commit 23d1d8e89be2d99f5c6924a6055fc80d69429503 -Author: Henrik Gramner -Date: Sat Feb 20 20:31:22 2016 +0100 - - x86: SSE2/AVX idct_dequant_2x4_(dc|dconly) - - Only used in 4:2:2. Both 8-bit and high bit-depth implemented. - - Approximate performance improvement compared to C on Ivy Bridge: - - x86-32 x86-64 - idct_dequant_2x4_dc 2.1x 1.7x - idct_dequant_2x4_dconly 2.7x 2.0x - - Helps more on 32-bit due to the C versions being register starved. - -commit dbbf1dd2836a21b65178442c1fb7a00ea089d7ec -Author: Henrik Gramner -Date: Sat Feb 20 16:53:35 2016 +0100 - - checkasm: Fix idct_dequant_2x4_(dc|dconly) tests - - They used the wrong qp values and the dconly test had the wrong name. This - was undetected before because there wasn't any assembly implementations. - -commit 0db0ac3a05b80eee7994fab08cbce2d07e8b1586 -Author: Henrik Gramner -Date: Sun Feb 7 14:55:26 2016 +0100 - - checkasm: Disable Windows Error Reporting - - When developing new assembly code it's expected that checkasm may crash, - and the error reporting dialog popup can be somewhat annoying. - -commit deae1b1001d134f5babc4fad3208bd951a454951 -Author: Henrik Gramner -Date: Sat Feb 6 18:49:46 2016 +0100 - - windows: Flag debug builds in the resource file - -commit 0082b717199bafb4abbb6638e7c30d50deaf2c1b -Author: Henrik Gramner -Date: Thu Feb 4 20:06:57 2016 +0100 - - cli: Refactor filter option parsing - - The old code contained a whole bunch of memory leaks, unchecked mallocs, - sections of dead code, etc. and was generally overly complex. - - Also consolidate some memory allocations into a single one. - -commit dfe394cadc8a39752de5b3f4a0be222c1b9290f2 -Author: Henrik Gramner -Date: Sun Jan 31 21:50:52 2016 +0100 - - ffms: Various improvements - - * Drop the MinGW Unicode workarounds. Those were required at the time - Windows Unicode support was added to x264 but the underlying problem - has since been fixed in FFMS. - - * Use FFMS_IndexBelongsToFile() as an additional sanity check when reading - an index file to ensure that it belongs to the current source video. - - * Upgrade to the new API to prevent deprecation warnings when compiling. - - * Fix a resource leak that would occur if FFMS_GetFirstTrackOfType() or - FFMS_CreateVideoSource() failed. - - * Minor string handling adjustments related to progress reporting. - - This increases the FFMS version requirement from 2.16.2 to 2.21.0. - -commit 215afdbd8ecc924f2028f79851458076683e97ad -Author: Henrik Gramner -Date: Mon Apr 11 16:59:46 2016 +0200 - - msvc: Add snprintf/vsnprintf replacements - - MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are - incompatible with C99 and may lead to buffer overflows. - -commit 5be32efc244d96aa56be462664b5c56d7318e86d -Author: Henrik Gramner -Date: Sun Jan 31 20:21:01 2016 +0100 - - configure: Define feature test macros for --std=gnu99 - - Makes the printf() family functions on MinGW use the correct C99 POSIX - versions instead of the broken pre-VS2015 Microsoft ones. - - Also allows us to get rid of some _GNU_SOURCE and _ISOC99_SOURCE defines. - -commit c01bf42117b811a0469f9f6c374f4a0daa98716d -Author: Henrik Gramner -Date: Thu Jan 28 18:37:37 2016 +0100 - - mingw: Enable high-entropy ASLR on 64-bit Windows - - To fully utilize HEASLR the image base address must also be set above - 4 GiB. For consistency use the same address as MSVC uses by default. - - This requires binutils 2.25 which isn't available on all common - distributions, so only enable it after checking that it's supported. - -commit dd6b7b974e0057da726f71e10c24d057a339605b -Author: Henrik Gramner -Date: Sun Jan 24 01:48:18 2016 +0100 - - msvs: WinRT support - - To compile x264 for WinRT the following additional steps has to be performed. - - * Ensure that the necessary SDK is installed. - - * Set the correct environment variables in the VS command prompt as shown at - https://trac.ffmpeg.org/wiki/CompilationGuide/WinRT - - * Add one of the following to --extra-cflags depending on the target OS: - "-DWINAPI_FAMILY=WINAPI_FAMILY_PC_APP -D_WIN32_WINNT=0x0A00" (Windows 10) - "-DWINAPI_FAMILY=WINAPI_FAMILY_PC_APP -D_WIN32_WINNT=0x0603" (Windows 8.1) - -commit 7650a1367003e24f4f1b831682c012b5ba3e6c69 -Author: Henrik Gramner -Date: Sun Jan 24 23:58:40 2016 +0100 - - configure: Disable CLI libraries when CLI is disabled - -commit 1ce062abb47ac59621b402cb26a1f14c91bb52bc -Author: Henrik Gramner -Date: Fri Feb 5 18:46:13 2016 +0100 - - matroska: mk_close: Check fseek() return value - -commit de7af9185e172122cd9b800845e1988a52ad7cc3 -Author: Henrik Gramner -Date: Fri Feb 5 18:46:02 2016 +0100 - - parse_qpfile: Check ftell() and fseek() return values - -commit fd2c324731c2199e502ded9eff723d29c6eafe0b -Author: Anton Mitrofanov -Date: Sun Apr 10 20:13:59 2016 +0300 - - Use the correct default B-ref placement with B-pyramid - - Cost analyse functions expects the placement of the B-ref in a sequence of - an even number of B-frames to be located towards the beginning while the - actual placement was towards the end. - - Change the placement to be consistent with the analyse expectations, e.g. - PbbBbP -> PbBbbP. - -commit e6a3f2989dd9eba3434c21fa94a6d9a5d1c7a9fe -Author: Henrik Gramner -Date: Fri Feb 5 18:45:47 2016 +0100 - - parse_zones: Fix memory leak - -commit f86756985d42ac4a14866534c588061ede860b7b -Author: Alexey Samsonov -Date: Mon Jan 25 16:05:25 2016 -0800 - - Fix float-cast-overflow in x264_ratecontrol_end function - - According to the C standard, it is undefined behavior to cast a negative - floating point number to an unsigned integer. Float-cast-overflow in - general is known to produce different results on different architectures. - - Building x264 code with Clang and -fsanitize=float-cast-overflow - (http://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#availablle-checks) - and running it on some real-life examples occasionally produces errors - of the form: - - encoder/ratecontrol.c:1892: runtime error: value -5011.14 is outside the - range of representable values of type 'unsigned short' - - Fix these errors by explicitly coding the de-facto x86 behavior: casting - float to uint16_t through int16_t. - -commit a01e33913655f983df7a4d64b0a4178abb1eb618 -Author: Sebastian Dröge -Date: Sun Dec 20 23:49:35 2015 +0300 - - Fix AVC-Intra padding for non-Annex B encoding - -commit 1e4a24f305c006a95fec00131703d0e0ecae3a38 -Author: Anton Mitrofanov -Date: Mon Jan 11 21:39:22 2016 +0300 - - ppc: Only perform AltiVec detection if compiled with AltiVec enabled - -commit b5953629117adc2b8d0d0eed6eb323c00587b428 -Author: Anton Mitrofanov -Date: Tue Oct 13 15:30:16 2015 +0300 - - 2-pass: Take into account possible frame reordering - -commit 20821a26ec510979e49fcfd6becc6ad7e2d8b388 -Author: Anton Mitrofanov -Date: Tue Oct 13 12:54:05 2015 +0300 - - Revise the 2-pass algorithm - -commit 065321c48d0d371c1735b3cc9d368b43e1b64aaa -Author: Anton Mitrofanov -Date: Tue Jan 5 02:41:43 2016 +0300 - - Revise the row VBV algorithm (part 2) - - Should fix rare cases of VBV emergency mode activation caused by too much trust - to the row predictors. - -commit d23d18655249944c1ca894b451e2c82c7a584c62 -Author: Henrik Gramner -Date: Fri Jan 1 12:44:31 2016 +0100 - - Bump dates to 2016 - -commit 3d972062c8a37d1a19586e2351e889b0a70beb40 -Author: Henrik Gramner -Date: Mon Oct 26 19:54:20 2015 +0100 - - cli: Use memory-mapped input frames for yuv and y4m - - Improves performance by avoiding extraneous memory copying. - Most beneficial on fast settings. - - On average around 5-10% faster overall on ultrafast but the - performance improvement can be even larger in some cases. - -commit 38a5268dbec56adea750e05c4981f3bbb176e735 -Author: Henrik Gramner -Date: Thu Jan 7 01:59:24 2016 +0100 - - y4m: Support extended frame headers when seeking - - Use the actual length of the frame header of the first frame instead of - assuming a header without extensions when calculating the frame size. - - Also makes the frame counter more accurate with extended frame headers. - -commit cc652c158c1fa65bfeafb6446b5be855850065d0 -Author: Henrik Gramner -Date: Tue Nov 3 17:55:08 2015 +0100 - - configure: Simplify cygwin/mingw/msys code - - Avoids some code duplication. - - Also drop the -mno-cygwin check since that option was removed back in 2008. - -commit 8b2d2a6d51abf51ad38dd8705d280448fbe63aaf -Author: Henrik Gramner -Date: Mon Oct 26 18:52:46 2015 +0100 - - y4m: Avoid some redundant strlen() calls - -commit 24f7705f15cf6d59028a76a894d866b9fad85f39 -Author: Henrik Gramner -Date: Sun Oct 25 17:15:10 2015 +0100 - - Simplify threadpool_wait - -commit 30ba5dc22fd0ae359e144847f2636574f659627d -Author: Henrik Gramner -Date: Fri Oct 16 19:05:34 2015 +0200 - - windows: Use native threads by default - - --disable-win32thread can be passed as an argument to configure to compile - with pthreads, which was the old default behavior. - -commit 1637239a64f3ec9a491b91202bd37097f15a253d -Author: Henrik Gramner -Date: Sun Oct 11 22:32:11 2015 +0200 - - x86: Avoid some bypass delays and false dependencies - - A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning - between int and float domains, so try to avoid that if possible. - -commit 7688814a7ec994f8e5984d199b465ccc068b98af -Author: Henrik Gramner -Date: Sun Oct 11 22:32:03 2015 +0200 - - x86: Enable high bit-depth x264_coeff_last64_avx2_lzcnt - - The function existed but was never enabled. - -commit 366fa85885053c7b836a4272a4fbec1852103979 -Author: Geza Lore -Date: Mon Oct 12 13:13:42 2015 +0100 - - x86inc: Add debug symbols indicating sizes of compiled functions - - Some debuggers/profilers use this metadata to determine which function a - given instruction is in; without it they get can confused by local labels - (if you haven't stripped those). On the other hand, some tools are still - confused even with this metadata. e.g. this fixes `gdb`, but not `perf`. - - Currently only implemented for ELF. - -commit 70c3ba42e610b4182edda4fdeb10b37a2a70eb8f -Author: Henrik Gramner -Date: Fri Oct 16 21:28:49 2015 +0200 - - x86inc: Avoid creating unnecessary local labels - - The REP_RET workaround is only needed on old AMD cpus, and the labels clutter - up the symbol table and confuse debugging/profiling tools, so use EQU to - create SHN_ABS symbols instead of creating local labels. Furthermore, skip - the workaround completely in functions that definitely won't run on such cpus. - - This patch doesn't modify any emitted instructions, and doesn't actually affect - x264 at all. It's only for other projects that use x86inc.asm without an - appropriate `strip` command in their buildsystem. - - Note that EQU is just creating a local label when using nasm instead of yasm. - This is probably a bug, but at least it doesn't break anything. - -commit 5c3d473a966e4b013759097fb98cd4a9cb5a34f5 -Author: Henrik Gramner -Date: Thu Oct 15 17:42:49 2015 +0200 - - x86inc: Simplify AUTO_REP_RET - - cpuflags is never undefined any more, it's set to 0 instead. - - Also fix an incorrect comment. - -commit 28d68f090c0103704f5f6a86fcf362251774cd78 -Author: Henrik Gramner -Date: Mon Oct 12 21:55:11 2015 +0200 - - x86inc: Use more consistent indentation - -commit 963b99efaaf1f0628b155e52b8a7c102cd1d37ff -Author: Henrik Gramner -Date: Mon Oct 12 20:15:18 2015 +0200 - - x86inc: Preserve arguments when allocating stack space - - When allocating stack space with a larger alignment than the known stack - alignment a temporary register is used for storing the stack pointer. - Ensure that this isn't one of the registers used for passing arguments. - -commit 6e5033417a53fa66d002665618a1350d7417725e -Author: Henrik Gramner -Date: Sun Jan 17 00:25:47 2016 +0100 - - x86inc: Improve FMA instruction handling - - * Correctly handle FMA instructions with memory operands. - * Print a warning if FMA instructions are used without the correct cpuflag. - * Simplify the instantiation code. - * Clarify documentation. - - Only the last operand in FMA3 instructions can be a memory operand. When - converting FMA4 instructions to FMA3 instructions we can utilize the fact - that multiply is a commutative operation and reorder operands if necessary - to ensure that a memory operand is used only as the last operand. - -commit 93cba743c78959ad97812dbaf894903c608912d0 -Author: Henrik Gramner -Date: Sun Oct 11 22:31:53 2015 +0200 - - x86inc: Be more verbose in assertion failures - -commit 8017b33454397d59b3285ec6d2ad35b6d0deb58a -Author: Henrik Gramner -Date: Wed Sep 30 23:17:00 2015 +0200 - - x86inc: Make cpuflag() and notcpuflag() return 0 or 1 - - Makes it possible to use them in arithmetic expressions. - -commit 5c6570495f8f1c716b294aee1430d8766a4beb9c -Author: Henrik Gramner -Date: Fri Oct 30 16:55:49 2015 +0100 - - encoder_open: Fix memory leak - - Furthermore, the x264_analyse_prepare_costs() and x264_analyse_init_costs() - functions were only used in x264_encoder_open(), so move that entire section - of code to analyse.c as well to simplify things. - -commit 424534537a249dcf913e02560303f6afca423489 -Author: Janne Grunau -Date: Wed Nov 18 11:08:22 2015 +0100 - - arm: do not fill mc_weight*_neon tabs for HIGH_BIT_DEPTH - - The asm is only for 8-bit and function prototypes reflect that. Avoids - numerous warnings with --bit-depth=9/10. - -commit df51d8efa8ce9afcedda64acc69c1dba2648716d -Author: Janne Grunau -Date: Tue Oct 13 23:50:11 2015 +0200 - - arm: Eliminate text relocations in asm - - Android 6 does not link shared libraries with text relocations. - - Make the movrel macro position independent and add movrelx for indirect - loads of external symbols. - - Move the function pointer table for the aligned memcpy variants to the - data.rel.ro section on Linux/Android. - -commit a2fe237af1b68f2bd53d64ed3faed62429d3ee5a -Author: Martin Storsjö -Date: Thu Oct 15 11:50:33 2015 +0300 - - arm: Don't assume alignment in mbtree_propagate_list_internal where it isn't provided - -commit 9f422c0cd9c0abcd6a7abb10b51f8be883c39b2b -Author: Janne Grunau -Date: Tue Oct 13 23:50:12 2015 +0200 - - arm: Fix checkasm register clobber check on iOS - - r9 is a volatile register in the iOS ABI and will therefore not be - preserved by compiled functions like the luma motion compensation. - - Add the symbol prefix to the puts() call and use blx since a switch - between arm and thumb mode might be required. - -commit 75992107adcc8317ba2888e3957a7d56f16b5cd4 -Author: Anton Mitrofanov -Date: Thu Oct 1 01:02:16 2015 +0300 - - ppc: Add detection of AltiVec support for FreeBSD - - Patch from FreeBSD ports. - -commit 479d0c1fe73833ba65e0a10f6f5cf18df6def719 -Author: Anton Mitrofanov -Date: Mon Sep 28 21:07:55 2015 +0300 - - Don't assume 16-byte stack alignment by default on x86-32 - - Some compilers depending on target OS uses 4-byte stack alignment by default. - Explicitly check known good compilers and specific options for stack alignment. - -commit fad44d59b3adeb29b9c92fde0b80116cde79020e -Author: Anton Mitrofanov -Date: Tue Sep 22 21:33:07 2015 +0300 - - Fix a few static analyzer performance hints - -commit de24c8c189364013e62d58d1e8f2fef878eb62bf -Author: Anton Mitrofanov -Date: Tue Sep 22 20:19:23 2015 +0300 - - Revise the row VBV algorithm - -commit 001d30598c75d9bbc3aa80f67f9bdac17692437d -Author: Anton Mitrofanov -Date: Tue Sep 22 19:26:25 2015 +0300 - - Fix high bit depth lookahead cost compensation algorithm - - Now high bit depth VBV should act more like 8-bit depth one. - -commit 91368390db9179226b5b4ed718a5788b754f9302 -Author: Anton Mitrofanov -Date: Tue Sep 22 19:05:52 2015 +0300 - - Correctly update the intra row predictor in B-frames - - It was previously used but never updated from it's initialization value. - -commit e0d722f85f8599e324be2bebef9430155b25c329 -Author: Anton Mitrofanov -Date: Tue Sep 22 18:58:24 2015 +0300 - - Change the predictors update algorithm - - Keep predictor offsets more stable. This should fix VBV misprediction in frames - with a large difference in complexity between the top and bottom parts. - -commit 6f04b146875c45e6f7845a7bb5fb7fdf8e7534f1 -Author: Martin Storsjö -Date: Thu Sep 3 09:30:44 2015 +0300 - - arm: Implement x264_mbtree_propagate_{cost, list}_neon - - The cost function could be simplified to avoid having to clobber - q4/q5, but this requires reordering instructions which increase - the total runtime. - - checkasm timing Cortex-A7 A8 A9 - mbtree_propagate_cost_c 63702 155835 62829 - mbtree_propagate_cost_neon 17199 10454 11106 - - mbtree_propagate_list_c 104203 108949 84532 - mbtree_propagate_list_neon 82035 78348 60410 - -commit 3e25eab0b7172e3c0b067b8b6d641ce148d03db9 -Author: Martin Storsjö -Date: Thu Sep 3 09:30:43 2015 +0300 - - x86: Share the mbtree_propagate_list macro with aarch64 - - This avoids having to duplicate the same code for all architectures - that implement only the internal part of this function in assembler. - -commit 654901dfca73a21e2bb2366dda79eb413e9bfb66 -Author: Martin Storsjö -Date: Wed Sep 2 22:39:51 2015 +0300 - - arm: Implement luma intra deblocking - - checkasm timing Cortex-A7 A8 A9 - deblock_luma_intra[0]_c 5988 4653 4316 - deblock_luma_intra[0]_neon 3103 2170 2128 - deblock_luma_intra[1]_c 7119 5905 5347 - deblock_luma_intra[1]_neon 2068 1381 1412 - - This includes extra optimizations by Janne Grunau. - - Timings from a separate build, on Exynos 5422: - - Cortex-A7 A15 - deblock_luma_intra[0]_c 6627 3300 - deblock_luma_intra[0]_neon 3059 1128 - deblock_luma_intra[1]_c 7314 4128 - deblock_luma_intra[1]_neon 2038 720 - -commit e2696a60a3e58d92e88e149b63c0b06a066eea9e -Author: Martin Storsjö -Date: Mon Aug 31 22:40:31 2015 +0300 - - arm: Implement some neon 8x16c intra predict functions - - checkasm timing Cortex-A7 A8 A9 - intra_predict_8x16c_dct_c 862 540 590 - intra_predict_8x16c_dct_neon 608 511 657 - intra_predict_8x16c_h_c 972 707 719 - intra_predict_8x16c_h_neon 722 656 672 - intra_predict_8x16c_p_c 10183 9819 8655 - intra_predict_8x16c_p_neon 2622 1972 1983 - -commit 5db8b6b93aa91079ab785b9b49413625430536fd -Author: Martin Storsjö -Date: Fri Aug 28 00:15:01 2015 +0300 - - arm: Implement x264_plane_copy_neon - - checkasm timing Cortex-A7 A8 A9 - plane_copy_c 13124 10925 9106 - plane_copy_neon 7349 5103 8945 - -commit 35d32d09e163bb0f2ce60a8e13f9f22125445346 -Author: Martin Storsjö -Date: Fri Aug 28 09:40:24 2015 +0300 - - checkasm: arm: Check register clobbering - - Cast the function pointer to a different type signature, to - be able to use uint64_t as return type (instead of intptr_t) for - those calls that require it. - - Use two separate functions, depending on whether neon is available. - -commit 9cbdb635a4bd78e6767e735a062c0d9a5766b849 -Author: Martin Storsjö -Date: Fri Aug 14 00:00:57 2015 +0300 - - checkasm: Try different widths for ssd_nv12 - - To test all codepaths in the aarch64 neon implementation, one at - the very least needs to test with width 8, 16, 24 and 32. - -commit 39af8c72e618a544baa06ae427fb2b440861abcd -Author: Jerome Duval -Date: Fri Jun 13 19:56:27 2014 +0000 - - Haiku support - - Add Haiku as supported platform in configure. - Haiku has no nice() function, use the platform specific substitute instead. - -commit 59683a97b50b34c6282457a959bb6b3e9e7f8c0d -Author: Martin Storsjö -Date: Tue Aug 25 14:38:20 2015 +0300 - - checkasm: aarch64: Check register clobbering - - Disable this on iOS, since it has got a slightly different ABI - for vararg parameters. - -commit 5c13589be828b524100c787057d6bef77898c657 -Author: Martin Storsjö -Date: Tue Aug 25 23:36:45 2015 +0300 - - arm: Implement x284_decimate_score15/16/64_neon - - checkasm timing Cortex-A7 A8 A9 - decimate_score15_c 764 736 535 - decimate_score15_neon 487 494 453 - decimate_score16_c 782 727 553 - decimate_score16_neon 487 494 521 - decimate_score64_c 2361 2597 2011 - decimate_score64_neon 1017 802 785 - -commit 3902ae02a0edede5d6c44cb3ee9e24e618c66e6a -Author: Martin Storsjö -Date: Tue Aug 25 23:36:44 2015 +0300 - - arm: Implement chroma intra deblock - - checkasm timing Cortex-A7 A8 A9 - deblock_chroma_420_intra_mbaff_c 1469 1276 1181 - deblock_chroma_420_intra_mbaff_neon 981 717 644 - deblock_chroma_intra[1]_c 2954 2402 2321 - deblock_chroma_intra[1]_neon 947 581 575 - deblock_h_chroma_420_intra_c 2859 2509 2264 - deblock_h_chroma_420_intra_neon 1480 1119 1028 - deblock_h_chroma_422_intra_c 6211 5030 4792 - deblock_h_chroma_422_intra_neon 2894 1990 2077 - -commit e8b95e92792d9353277995043757430cf3dc3bf7 -Author: Martin Storsjö -Date: Tue Aug 25 14:38:17 2015 +0300 - - arm: Implement x264_pixel_sa8d_satd_16x16_neon - - This requires spilling some registers to the stack, - contray to the aarch64 version. - - checkasm timing Cortex-A7 A8 A9 - sa8d_satd_16x16_neon 12936 6365 7492 - sa8d_satd_16x16_separate_neon 14841 6605 8324 - -commit 6bbaa2758d53d0d6d645142d7d818c960d137a0e -Author: Martin Storsjö -Date: Tue Aug 25 14:38:16 2015 +0300 - - arm: Implement x264_deblock_h_chroma_mbaff_neon - - checkasm timing Cortex-A7 A8 A9 - deblock_chroma_420_mbaff_c 1944 1706 1526 - deblock_chroma_420_mbaff_neon 1210 873 865 - -commit 3c66591e859045ef79a7131b991a5f20c80ffbb4 -Author: Martin Storsjö -Date: Tue Aug 25 14:38:15 2015 +0300 - - arm: Implement x264_deblock_h_chroma_422_neon - - checkasm timing Cortex-A7 A8 A9 - deblock_h_chroma_422_c 6953 6269 5145 - deblock_h_chroma_422_neon 3905 2569 2551 - -commit 5265b927b0f2e043dd39cbbbf3909da0862d60e6 -Author: Martin Storsjö -Date: Tue Aug 25 14:38:14 2015 +0300 - - arm: Implement integral_init4/8h/v_neon - - checkasm timing Cortex-A7 A8 A9 - integral_init4h_c 10466 8590 6161 - integral_init4h_neon 3021 1494 1800 - integral_init4v_c 16250 13590 13628 - integral_init4v_neon 3473 2073 3291 - integral_init8h_c 10100 8275 5705 - integral_init8h_neon 4403 2344 2751 - integral_init8v_c 6403 4632 4999 - integral_init8v_neon 1184 783 1306 - -commit b08403b5593307b919bfe5bfbd743da825326a4c -Author: Martin Storsjö -Date: Tue Aug 25 14:38:13 2015 +0300 - - arm: Implement x264_denoise_dct_neon - - checkasm timing Cortex-A7 A8 A9 - denoise_dct_c 6604 5510 5858 - denoise_dct_neon 1774 1139 1614 - -commit ceee976bde76a5f4126bfd9d8454f0e601e67204 -Author: Martin Storsjö -Date: Tue Aug 25 14:38:12 2015 +0300 - - arm: Add x264_nal_escape_neon - - checkasm timing Cortex-A7 A8 A9 - nal_escape_c 852758 879566 655497 - nal_escape_neon 376831 450678 371673 - -commit 8feb733ed1dcb1cc94df3b0e6c98009832ea85cc -Author: Martin Storsjö -Date: Tue Aug 25 14:38:11 2015 +0300 - - arm: Add neon versions of vsad, asd8 and ssd_nv12_core - - These are straight translations of the aarch64 versions. - - checkasm timing Cortex-A7 A8 A9 - vsad_c 16234 10984 9850 - vsad_neon 2132 1020 789 - - asd8_c 5859 3561 3543 - asd8_neon 1407 1279 1250 - - ssd_nv12_c 608096 591072 426285 - ssd_nv12_neon 72752 33549 41347 - -commit 42b3b398664349d23b2122ac940417165424542d -Author: Martin Storsjö -Date: Tue Aug 25 14:38:10 2015 +0300 - - checkasm: Check the right output range for integral_initXh - - These functions write their output into sum+stride, while we previously - only checked [0..stride-8] within the sum array. - - This catches the previously broken aarch64 version of these functions. - - Also check up until stride-4 elements for init4h. - -commit 3d86abab097fa26d116112f188458269c6a0415f -Author: Janne Grunau -Date: Thu Aug 20 13:55:54 2015 +0200 - - aarch64: Skip deblocking in 264_deblock_h_chroma_422_neon - - If the parameters (alpha, beta, tc0[]) indicated that the deblocking - should have been skipped, every 2nd chrome line would have deblocked - anyway. - - deblock_h_chroma_422_neon: 2259 (before) - deblock_h_chroma_422_neon: 2192 (after) - -commit aec81efd3fe43008551916aa6073eb0732a58210 -Author: Janne Grunau -Date: Mon Aug 17 16:39:20 2015 +0200 - - aarch64: Optimize various intra_predict asm functions - - Make them at least as fast as the compiled C version (tested on - cortex-a53 vs. gcc 4.9.2). - - C NEON (before) NEON (after) - intra_predict_4x4_dc: 260 335 260 - intra_predict_4x4_dct: 210 265 200 - intra_predict_8x8c_dc: 497 548 493 - intra_predict_8x8c_v: 232 309 179 (arm64) - intra_predict_8x16c_dc: 795 830 790 - -commit b16268ac0826d78455d0d704ea0fc8b1edc6b6bf -Author: Janne Grunau -Date: Tue Aug 18 10:25:10 2015 +0200 - - aarch64: Faster intra_predict_4x4_h - - Use multiplication with 0x01010101 for splats. - - On a cortex-a53: - gcc 4.9.2 llvm 3.6 neon (before) neon (after) - intra_predict_4x4_h: 162 147 160/155 139/135 - -commit f2a6be92e5e42e8ef1daf74f63dbdbc4819d2070 -Author: Janne Grunau -Date: Tue Aug 18 10:25:09 2015 +0200 - - aarch64: Fix coeff_level_run* macros with LLVM's assembler - - LLVM's integrated assembler does not treat symbols as integer constants. - -commit 592e92e9a8e47c3f0d0017c8158df5a4830e0bbd -Author: Janne Grunau -Date: Tue Aug 18 10:25:08 2015 +0200 - - aarch64: Remove commas LLVM's assembler complains about - -commit 6efb57ada652fd015ec4cacffd09282632bb975b -Author: Martin Storsjö -Date: Thu Aug 13 23:59:31 2015 +0300 - - arm: Implement x264_sub8x16_dct_dc_neon - - checkasm timing Cortex-A7 A8 A9 - sub8x16_dct_dc_c 6386 3901 4080 - sub8x16_dct_dc_neon 1491 698 917 - -commit 89439b2c604c81e13eb3da9e692d2cdae5a18b53 -Author: Martin Storsjö -Date: Thu Aug 13 23:59:28 2015 +0300 - - arm: Optimize x264_deblock_h_chroma_neon - - Shuffle both chroma components together as a 16 bit unit, and - don't write the unchanged columns (like in x264_deblock_h_luma_neon - and in the aarch64 version of the function). - - This causes a minor slowdown for x264_deblock_v_chroma_neon, but - it is negligible compared to the speedup. - - checkasm timing Cortex-A7 A8 A9 - deblock_chroma[1]_c 4817 4057 3601 - deblock_chroma[1]_neon 1249 716 817 (before) - deblock_chroma[1]_neon 1249 766 845 (after) - - deblock_h_chroma_420_c 3699 3275 2830 - deblock_h_chroma_420_neon 2068 1414 1400 (before) - deblock_h_chroma_420_neon 1838 1355 1291 (after) - -commit ff71457d71c5c11ed825d848677cab09c7639012 -Author: Martin Storsjö -Date: Thu Aug 13 23:59:27 2015 +0300 - - aarch64: Remove leftover commented out code - -commit ef6034812162fc8b51bfd5e87387f405d1cc30cb -Author: Martin Storsjö -Date: Thu Aug 13 23:59:26 2015 +0300 - - aarch64: Simplify the decimate_score functions - - After doing a left shift by the number of bits returned by clz, - only bits set to zero can be shifted out, so if the register - was nonzero to start with (which is checked), it can't become - zero here. - -commit d2b04a26b26d02c41ffb05cf1a605dafe9e6fa59 -Author: Martin Storsjö -Date: Thu Aug 13 23:59:25 2015 +0300 - - arm: Use aligned loads in x264_coeff_last15_neon - - After subtracting 2, the pointer will be aligned. - - checkasm timing Cortex-A7 A8 A9 - coeff_last15_c 423 375 230 - coeff_last15_neon 350 420 404 (before) - coeff_last15_neon 350 400 394 (after) - -commit 3f89a6bbee061cb0361770cf5b8495448515a011 -Author: Martin Storsjö -Date: Thu Aug 13 23:59:24 2015 +0300 - - arm: Simplify x264_predict_8x8c_p_neon - - This gets rid of a few unnecessary (and confusing) steps in - calculating the increment to i00. - - checkasm timing Cortex-A7 A8 A9 - intra_predict_8x8c_p_c 5525 4732 4755 - intra_predict_8x8c_p_neon 1719 1140 1262 (before) - intra_predict_8x8c_p_neon 1663 1142 1255 (after) - -commit a0cd7d38acb6c31973228ab207e18344920e0aa3 -Author: Vittorio Giovara -Date: Tue Sep 15 15:40:14 2015 +0200 - - lavf: Use the prefixed name for pixel format enum - -commit 63555e696a997ff795798d3357d770f8ab373cd9 -Author: Janne Grunau -Date: Thu Sep 3 00:21:58 2015 +0200 - - aarch64: fix x264_mbtree_propagate_cost_neon - - The branch conditon caused the loop to execute one time more than - intended. Detected by a memory corruption on arm with the 1 to 1 port of - the function. - -commit 5c4728d8dd82ba46901824470db1609ae0f2521d -Author: Martin Storsjö -Date: Thu Aug 13 23:59:22 2015 +0300 - - aarch64: Fix integral_init4/8h_neon - - The stride is the number of uint16_t elements and thus needs - to be shifted. - - This issue had slipped unnoticed since checkasm didn't actually - verify the output of these functions. - -commit 67076513267907b5601828ae6864cc063c8c7548 -Author: Henrik Gramner -Date: Thu Aug 27 19:53:00 2015 +0200 - - x86: Fix integral_init4/8h_avx2 - - The AVX2 implementation was using the wrong offsets. It went undetected due to - the checkasm test being incorrect. - -commit e86f3a1993234e8f26050c243aa253651200fa6b -Author: Mark Webster -Date: Wed Aug 5 04:28:17 2015 +0100 - - Simplify inclusion of x264.h in C++ projects - - Name all structs to support forward declarations. - Add a conditional extern "C" wrapper in x264.h itself instead of having to - specify it in every location where it's included. - -commit 401941cc7099b322864600b62104940542497e7a -Author: Henrik Gramner -Date: Sun Aug 16 21:59:26 2015 +0200 - - checkasm: Properly save rdx/edx in checkasm_call() on x86 - - If the return value doesn't fit in a single register rdx/edx can in some - cases be used in addition to rax/eax. - - Doesn't affect any of the existing checkasm tests but it's more correct - behavior and it might be useful in the future. - -commit 3dff8af3033a9e81d7966c5749fd361ce421467a -Author: Henrik Gramner -Date: Tue Aug 11 17:19:35 2015 +0200 - - x86: Enable SSE2 by default on x86-32 - - It makes more sense to tune the defaults to benefit the vast majority of users. - - Anyone still using a Pentium III for video encoding is of course free to - explicitly set different flags when compiling. - -commit 51d8aa09b777dc2969deaa954d5f6af9836c02ba -Author: Henrik Gramner -Date: Mon Aug 10 22:30:21 2015 +0200 - - msvs/icl: Improve default CFLAGS - - Use -fp:fast as a substitute for -ffast-math. - Increase warning level from -W0 to -W1 (the default setting). - Disable -GS (stack cookies) on MSVS. It's disabled by default on ICL. - -commit 7edaf4b966aaee098ff301436f8d2b33a6fe5983 -Author: Henrik Gramner -Date: Wed Aug 12 22:23:31 2015 +0200 - - Use a relative $SRCPATH for out-of-tree builds - - Fixes out-of-tree MSVS builds on Cygwin. - -commit e7b4b863dc2555ed835569c400d3a30f7ddc15ff -Author: Henrik Gramner -Date: Sat Aug 8 22:26:38 2015 +0200 - - cygwin: Enable MSVS support - - `cl -showIncludes` creates absolute Windows paths for some files, attempt - to convert those to Unix paths. - - Use relative paths for dependencies located in or below the working directory - in order to mimic the behavior of gcc and to make the paths more readable. - - Make the dependency generation script a bit more robust in general. - -commit 817a4414b98e8a511c626932e7d433388bc96507 -Author: Henrik Gramner -Date: Sat Aug 8 18:34:21 2015 +0200 - - cltostr.sh: Minor fixes - -commit 1a3d963441eaad25972763423d60158f597c5f65 -Author: Henrik Gramner -Date: Sat Aug 8 12:21:54 2015 +0200 - - Simplify version.sh - - Also remove some non-POSIX syntax and improve robustness. - - As a bonus the script now runs about 2-3 times faster. - - `git rev-list --count` could be used to simplify things even further, - but that functionality was added in git 1.7.2 so keep `wc -l` for now - to maintain compatibility with older git versions. - -commit f7f6af76ef22e812ef330e2839488e83dd553836 -Author: 장영훈 -Date: Fri Aug 7 14:43:24 2015 +0900 - - msvs: Fix cl detection in non-English environments - -commit e1a55bbbff2b4460ceb843f163e349fed7d32969 -Author: Henrik Gramner -Date: Mon Aug 3 21:05:11 2015 +0200 - - x86inc: Sync minor changes from ffmpeg/libav - -commit 36f537b141da076032fd11f1745bb62d466dd7bf -Author: Henrik Gramner -Date: Wed Jul 29 19:30:52 2015 +0200 - - matroska: Add comments for the remaining element names - -commit f04062e6380cbe10453dab33a3575c373e63ff9b -Author: Henrik Gramner -Date: Wed Jul 29 19:30:41 2015 +0200 - - Silence various static analyzer warnings - - Those are false positives, but it doesn't hurt to get rid of them. - -commit b1cbf7ebe4a192bbc25cc910cb2910a34992f807 -Author: Henrik Gramner -Date: Sun Jul 26 23:13:29 2015 +0200 - - mingw: Enable the tsaware linker flag - - Avoids an irrelevant compatibility layer in Terminal Services environments. - - https://msdn.microsoft.com/en-us/library/cc834995.aspx - -commit 8a1ff031ecd4b423fc373540b9b68cdf97602bbf -Author: Henrik Gramner -Date: Sun Jul 26 23:13:26 2015 +0200 - - msvs: Don't redefine snprintf for VS2015 - - Visual Studio 2015 has a proper snprintf implementation. - -commit aa9d22927c0264c08c11c9e72294fc651a155b3e -Author: Henrik Gramner -Date: Sun Jul 26 23:13:19 2015 +0200 - - msvs: Prefer link.exe from the same directory as cl.exe - - /usr/bin/link from coreutils may be located before the MSVS linker in $PATH - which causes linking to fail due to using the wrong binary. - -commit ca8bd68063d74227d917f34fd50942265f9a106c -Author: Henrik Gramner -Date: Mon Jul 27 00:10:00 2015 +0200 - - frame_dump: check fseek() return value - -commit 53b3b747e22f53204f6efb5106ab4a5a8eb57626 -Author: Henrik Gramner -Date: Mon Jul 27 00:08:38 2015 +0200 - - x264_vfprintf: use va_copy - - It's undefined behavior to use the same va_list twice. - - This most likely didn't cause any issues in practice since the string would - have to be larger than 4 KiB to trigger the fallback path. - - Use workaround for ICL as it doesn't define va_copy even for C99. - -commit 59e7ded846a832125cb533aadff9895487771ea7 -Author: Henrik Gramner -Date: Mon Jul 27 00:08:31 2015 +0200 - - param_parse: Fix framerate rounding issues - -commit 73ae2d11d472d0eb3b7c218dc1659db32f649b14 -Author: Marcin Juszkiewicz -Date: Mon Jun 1 11:24:45 2015 +0200 - - aarch64: Remove broken CFLAGS in configure - - GCC doesn't have an "-arch" switch, but works when that entire line is removed. - -commit cc002bd545b008b1cdc7c6d7cc0c616ba125d4d5 -Author: Rong Yan -Date: Mon Jul 20 03:34:20 2015 -0500 - - ppc: Add little-endian PowerPC support - -commit 145f3a6275802a649b8dedb49bb0e054caf31717 -Author: Rishikesh More -Date: Thu Jun 18 17:48:46 2015 +0530 - - mips: MSA quant optimizations - - Signed-off-by: Rishikesh More - -commit 16395d2b6f827b076612eb5b70711b79621da67e -Author: Rishikesh More -Date: Thu Jun 18 17:48:45 2015 +0530 - - mips: MSA predict optimizations - - Signed-off-by: Rishikesh More - -commit 204e1a60237e0b3168ccbdb2905c9af8188b90ee -Author: Rishikesh More -Date: Thu Jun 18 17:48:44 2015 +0530 - - mips: MSA pixel optimizations - - Signed-off-by: Rishikesh More - -commit 3ce6430eb11839c69d606c59c0f8c31ce0b6dd17 -Author: Rishikesh More -Date: Thu Jun 18 17:48:43 2015 +0530 - - mips: MSA deblock optimizations - - Signed-off-by: Rishikesh More - -commit 57618eead025eaf654226add94689d6d2999ccf6 -Author: Rishikesh More -Date: Thu Jun 18 17:48:42 2015 +0530 - - mips: MSA dct optimizations - - Signed-off-by: Rishikesh More - -commit 4ebb23aaf4f46b7a04aa8aefa3c08e7b6493de4c -Author: Rishikesh More -Date: Thu Jun 18 17:48:40 2015 +0530 - - mips: MSA mc optimizations - - Signed-off-by: Rishikesh More - -commit cd19444d3f9915a5a33a95e308bc8021d7e62afe -Author: Rishikesh More -Date: Thu Jun 18 17:48:38 2015 +0530 - - mips: Common MSA macros - - Add macros for load/store, slide, shift, transpose and basic arithmetic - operations required by subsequent patches. - - Signed-off-by: Rishikesh More - -commit 72b82bd98a99b1d75322b70a74365547382ce062 -Author: Rishikesh More -Date: Tue May 12 19:38:09 2015 +0530 - - mips: Add MSA support to checkasm - - Signed-off-by: Rishikesh More - -commit ce0757d9d2778e349a7c2f6445b6aa75d8765c30 -Author: Kaustubh Raste -Date: Fri Apr 17 17:38:58 2015 +0530 - - mips: Initial MSA support - - MSA is the MIPS SIMD Architecture. - - Add X264_CPU_MSA define. - Update configure to detect MIPS platform and set flags. - CPU-specific gcc options are expected through --extra-cflags. - - Sample command line for mips32r5: - ./configure --host=mipsel-linux-gnu --cross-prefix=/mips-mti-linux-gnu- - --extra-cflags="-EL -mips32r5 -msched-weight -mload-store-pairs" - - Signed-off-by: Kaustubh Raste - -commit 9140ee1fb39bd4a4ccace28091398e8a96704f07 -Author: Anton Mitrofanov -Date: Fri Jul 17 00:22:29 2015 +0300 - - Limit autodetection of threads number according to the source height - -commit aeaed2d07b5b43437bb640e1f987d42a6fab03b9 -Author: Anton Mitrofanov -Date: Thu Jul 16 19:04:59 2015 +0300 - - Fine-tune of frame's size predictors at ratecontrol start - - This is attempt to improve VBV at start of video with a lot of threads which - delay feedback for predictors. - -commit aa275158641e94203003157947d43ff4cc685068 -Author: Anton Mitrofanov -Date: Thu Jul 16 16:15:56 2015 +0300 - - Use forced frame types in slicetype analysis - - This should improve MBTree and VBV when a lot of forced frame types are used. - -commit a83edfa053f60ad0c8a164f31e7492a680eef361 -Author: Henrik Gramner -Date: Mon Dec 1 22:05:42 2014 +0100 - - x86: SSSE3 and AVX2 implementations of plane_copy_swap - - For NV21 input. - -commit 627f891c571cacb51deb5e211b23c309b14a6587 -Author: Yu Xiaolei -Date: Fri Jun 6 16:05:27 2014 +0800 - - NV21 input support - - Eliminates an extra copy when encoding Android camera preview images. - - Checkasm test by Janne Grunau. - ARM assembly with improvements from Janne Grunau. - -commit 6ee94dc898dc029553e308f1e76891ccefb3f0a7 -Author: Henrik Gramner -Date: Tue Jun 23 17:00:47 2015 +0200 - - deblock: Write combining - -commit 08a9c51919f4edbd6e484155e5521a92a0800651 -Author: Henrik Gramner -Date: Tue Jun 23 14:59:59 2015 +0200 - - Get rid of some tabs and trailing whitespaces - -commit b568a256b9bc6c500d7b1ffe4b9c3311ee5ff337 -Author: Henrik Gramner -Date: Sat May 23 19:44:16 2015 +0200 - - x86: Experimental nasm support - - Enables the use of nasm as an alternative to yasm. - - Note that nasm cannot assemble x264 with PIC enabled since it currently doesn't - support [symbol-$$] addressing which is used extensively by x264's PIC code. - This includes all 64-bit Windows and 64-bit OS X builds, even non-shared. - - For the above reason nasm is currently intentionally not auto-detected, instead - the assembler must be explicitly specified using "AS=nasm ./configure". - - Also drop -O2 from ASFLAGS since it's simply ignored anyway. - -commit d14e38c059c9a2aecc82477b99d56ef74eb731ec -Author: Timothy Gu -Date: Tue May 26 19:12:42 2015 +0200 - - x86inc: Prevent warnings when using `struc` and `endstruc` - - struc and endstruc attempts to revert to the previous section state set by - the SECTION macro. - - Use the primitive [SECTION] directive instead of the SECTION macro for the - .note.GNU-stack section to prevent it from being emitted again during endstruc. - -commit 353b1f888c34081e94727a1ffa0e4920e2cfe8a9 -Author: Henrik Gramner -Date: Wed May 27 21:38:14 2015 +0200 - - x86inc: Drop SECTION_TEXT macro - - The .text section is already 16-byte aligned by default on all supported - platforms so `SECTION_TEXT` isn't any different from `SECTION .text`. - -commit b615f82e45c88b7915c5571ad09fa65a0b6130d7 -Author: Henrik Gramner -Date: Sat May 23 13:38:05 2015 +0200 - - x86inc: Disable vpbroadcastq workaround in newer yasm versions - - The bug was fixed in 1.3.0, so only perform the workaround in earlier versions. - -commit 8f834d6ccc054d8c32d84310664dc07abac553ec -Author: Henrik Gramner -Date: Sun May 24 22:57:00 2015 +0200 - - Prefer Unicode versions of Windows API calls - - Just for consistency, doesn't affect behavior. - -commit 3f8c8eb1758d0fa890538eba6f5e699c93dc1304 -Author: Henrik Gramner -Date: Sun May 24 23:21:20 2015 +0200 - - Get rid of fPIC warnings when compiling a shared library on Windows - - PIC is always enabled when compiling for Windows so gcc complains when using - -fPIC since it doesn't do anything. - -commit 0c21480fa2fdee345a3049e2169624dc6fc2acfc -Author: Henrik Gramner -Date: Sat Jul 25 22:42:59 2015 +0200 - - matroska: Write the correct DocTypeVersion when using frame-packing - - The StereoMode element is only valid with DocTypeVersion 3 or higher. - -commit 791d265281af1d022a72ba9e003a987e97da5c0d -Author: Anton Mitrofanov -Date: Sat Jul 25 00:21:52 2015 +0300 - - dump_yuv: Fix file handle leak - -commit d6aa586b2f83eeb776744c2e97a8ce9e1181c59b -Author: Anton Mitrofanov -Date: Sat Jul 25 00:20:47 2015 +0300 - - mp4: Fix file handle leak - -commit 942e4e4530d0909c2b580be88acd18d1e5fa4fa8 -Author: Henrik Gramner -Date: Wed Jun 24 00:40:45 2015 +0200 - - flv: Check fseek() and fwrite() return values - -commit 250d5b0e13045f6a1ebfeb379933b5c5daa9cf41 -Author: Henrik Gramner -Date: Wed Jun 24 00:22:56 2015 +0200 - - flv: Fix memory and file handle leaks - -commit 3533520655ef095ef009af9b6b27a20b45fd13ee -Author: Henrik Gramner -Date: Wed Jun 24 01:23:35 2015 +0200 - - avs: Fix file handle leak - -commit df152a77e1b17065aecb40c9a2a28d5953887ac9 -Author: Henrik Gramner -Date: Tue Jun 23 13:38:02 2015 +0200 - - matroska: Fix memory leak - -commit 6d5249977f5d62f6e167a062bdd94d8546eca1f7 -Author: Henrik Gramner -Date: Tue Jun 23 13:24:29 2015 +0200 - - rdo: Fix potential CAVLC overflow issues - -commit 936e8da1a4f9d0431b181d0877bb1602d4de9441 -Author: Henrik Gramner -Date: Tue Jun 23 22:08:35 2015 +0200 - - slurp_file: Various minor bug fixes - - * Fix unsigned <= 0 check. - * Add additional size sanity check on 32-bit systems. - * Don't read uninitialized data if fread() fails. - -commit d302526d5b97818f588b86f408f910924790242e -Author: Henrik Gramner -Date: Tue Jun 23 22:47:53 2015 +0200 - - param_parse: Check strdup() return value - -commit 94e476d80b9635508907893c97e8f8d9f0bc9ddf -Author: Henrik Gramner -Date: Tue Jun 23 15:38:16 2015 +0200 - - param_parse: Fix memory leak - -commit 45856b9787eab95434d66b4bc2e18819483f0e43 -Author: Anton Mitrofanov -Date: Fri Jun 19 16:01:12 2015 +0300 - - Add FreeBSD's stdint.h header guard to allowed list - - Patch written by Koop Mast - -commit 35cf1a2cbf253e43cab7747eb903a3b844bd42c1 -Author: Henrik Gramner -Date: Fri May 22 19:23:33 2015 +0200 - - x86: Prevent overread of src in plane_copy_interleave - - Could only occur in 4:2:2 with height == 1. - - Also enable asm for inputs with different U/V strides as long as the strides - have identical signs. - -commit 003414a4b3724f0972e4507dfd1432dd442d2228 -Author: Anton Mitrofanov -Date: Wed May 20 23:10:20 2015 +0300 - - checkasm: Fix incorrect memcmp size for ARM architecture - -commit e08fdc81018489217f4bafe7321a3baf372fac1f -Author: Anton Mitrofanov -Date: Sun Apr 26 20:51:05 2015 +0300 - - Fix possible use of uninitialized MVs in lookahead analysis for B-frames - -commit 0b0210857ef13214f12861dec672006455a556d6 -Author: Anton Mitrofanov -Date: Tue Apr 21 23:08:19 2015 +0300 - - Catch incorrect usage of libx264 API for delayed frames flushing - -commit 3a6bd39a650b47572743c2d2ea2fd7c214053fb2 -Author: Anton Mitrofanov -Date: Sat Mar 7 23:00:09 2015 +0300 - - Fix detection of system libx264 configuration - -commit 121396c71b4907ca82301d1a529795d98daab5f8 -Author: Anton Mitrofanov -Date: Mon Feb 23 14:23:18 2015 +0300 - - Cosmetic changes - -commit 8e71b432e5dbe835fa4516064f6841a03c79b183 -Author: Anton Mitrofanov -Date: Wed Dec 31 02:15:05 2014 +0300 - - Update configure for auto detection of system libx264 configuration - -commit 0f84192e88d6adc4512f6f320a50a09b4608634c -Author: Anton Mitrofanov -Date: Tue Feb 3 14:51:28 2015 +0300 - - Add tile format frame packing value - - Defined in 2014-02 edition. - -commit f08b1c6b8e186ff5a931e9a80e8923e42efff0e4 -Author: Anton Mitrofanov -Date: Tue Feb 3 13:39:14 2015 +0300 - - Stricter validation of crop-rect values - -commit 196cb9ab52af9370fc66a474ffc4a52a75dc5eb4 -Author: Vittorio Giovara -Date: Tue Jan 20 16:15:56 2015 +0000 - - Add mono frame packing value - - Defined in 2013-04 edition. - -commit c8a773ebfca148ef04f5a60d42cbd7336af0baf6 -Author: Vittorio Giovara -Date: Tue Jan 20 15:57:41 2015 +0000 - - Validate frame packing value instead of clipping - -commit a95584945dd9ce3acc66c6cd8f6796bc4404d40d -Author: Christophe Gisquet -Date: Tue Feb 3 20:40:41 2015 +0100 - - x86inc: Correctly warn on use of SSE2 instructions in SSE functions - - SSE2 instructions that are XMM-implementations of pre-existing MMX/MMX2 - instructions did not issue warnings when used in SSE functions. Handle - it by also checking the register type when such instructions are used. - -commit 23d4434de9ab5ef32ebb03401d971b8579a65fc6 -Author: Christophe Gisquet -Date: Tue Feb 3 18:02:30 2015 +0100 - - x86inc: Fix instantiation of YMM registers - -commit 4c75f3d729aaf3bcb00edf789c71f09495374bdf -Author: Vittorio Giovara -Date: Tue Jan 20 16:28:54 2015 +0000 - - matroska: Correctly write display width and height in stereo mode - - According to the specifications, when stereo mode is set, these values - represent the single view size. - -commit c3ba2a8c595b1bb36da55b82f7f4046471349d0e -Author: Kieran Kunhya -Date: Tue Jan 20 09:38:00 2015 -0600 - - Use POC type 0 for AVC-Intra - - Based on a patch from Capella Systems - -commit b77cc09b9252d70f78726f2472391b63948d9895 -Author: Anton Mitrofanov -Date: Sat Jan 3 15:46:19 2015 +0300 - - Fix ARCH variable name conflict with BSD ports (bsd.port.mk) read-only variable - -commit 6e769846626f9185b59f3967e8b4ebe11497d878 -Author: Anton Mitrofanov -Date: Sat Dec 27 20:35:39 2014 +0300 - - Fix negative percentages in final stats output - - They were caused by integer overflow when encoding long UHD video. - -commit d7ccd89f1bea53c8c524f8e6eb963d57defb6813 -Author: Anton Mitrofanov -Date: Sat Jan 3 23:35:23 2015 +0300 - - Bump dates to 2015 - -commit 40bb56814e56ed342040bdbf30258aab39ee9e89 -Author: Anton Mitrofanov -Date: Mon Dec 15 18:49:23 2014 +0300 - - x86: Update intel compiler cpu dispatcher override for new versions of ICC/ICL - -commit d72a85b549acd981a8dae3dc5b71920ab2aeea4f -Author: Anton Mitrofanov -Date: Tue Sep 6 21:53:29 2011 +0400 - - New AQ mode: auto-variance AQ with bias to dark scenes - - Also known as --aq-mode 3 or auto-variance AQ modification. - -commit f4a455a43df3088bae5208dcc98b8f6214fdce7d -Author: Anton Mitrofanov -Date: Wed Aug 29 03:02:27 2012 +0400 - - Improve HRD conformance - -commit fa3549b5f2478f39cbcbd14d2e956e59f70d18eb -Author: Henrik Gramner -Date: Fri Nov 28 23:24:56 2014 +0100 - - x86: SSE and AVX implementations of plane_copy - - Also remove the MMX2 implementation and fix src overread for height == 1. - -commit 8797e0f8d416aadb91d359f144e4e7855071870a -Author: Anton Mitrofanov -Date: Mon Sep 29 23:26:19 2014 +0400 - - Update to the latest version of gas-preprocessor.pl from http://git.libav.org/?p=gas-preprocessor.git - - Contributions by Janne Grunau, Martin Storsjo, Mans Rullgard, David Conrad, Martin Aumuller and others - -commit 59b9c252cfa6242c7fa6424a463e51913996fe6a -Author: Janne Grunau -Date: Wed Nov 19 00:33:55 2014 +0100 - - aarch64: cabac_encode_{decision,bypass,terminal}_asm - - benchmarks on a Nexus 9 (nvidia denver): - 101.3 cycles in x264_cabac_encode_decision_c, 67105369 runs, 3495 skips - 97.3 cycles in x264_cabac_encode_decision_asm, 67105493 runs, 3371 skips - 132.8 cycles in x264_cabac_encode_terminal_c, 1046950 runs, 1626 skips - 116.1 cycles in x264_cabac_encode_terminal_asm, 1048424 runs, 152 skips - 92.4 cycles in x264_cabac_encode_bypass_c, 16776192 runs, 1024 skips - 89.6 cycles in x264_cabac_encode_bypass_asm, 16776453 runs, 763 skips - - Cycle counts are not as stable as one would like. The dynamic code - optimisation seems to produce different results for small chnages in a - binary. Repeated runs with the same binary produce stable results - though (ignoring the first run). - -commit a6ec424939a4d3a59e4ec1e3999cb37e4314408e -Author: Janne Grunau -Date: Thu Nov 6 09:20:17 2014 +0100 - - checkasm: add cycle counter read for aarch64 - - Needs kernel support since user space access to the cycle counter is not - allowed on all available AArch64 systems (Android 5 and iOS). - -commit fa7e9d3d082327ceeacfaf85da6cde4c50fb4e5b -Author: Janne Grunau -Date: Wed Nov 5 11:35:13 2014 +0100 - - aarch64: nal_escape_neon - - 3-4 times faster. - -commit f13573e490d9f18bbcb10409fb09ec25e477035e -Author: Janne Grunau -Date: Fri Oct 31 14:49:04 2014 +0100 - - aarch64: {plane_copy,memcpy_aligned,memzero_aligned}_neon - - 2-3 times faster than C. - -commit 8d655b63b4f7bc021ad038ea64b7c4de9d0ef74b -Author: Janne Grunau -Date: Wed Oct 29 18:17:48 2014 +0100 - - aarch64: x264_mbtree_propagate_{cost,list}_neon - - x264_mbtree_propagate_cost_neon is ~7 times faster. - x264_mbtree_propagate_list_neon is 33% faster. - -commit 4d400a6ec67f17ae3b17876b0318b956b6d5c856 -Author: Janne Grunau -Date: Tue Oct 21 15:18:49 2014 +0200 - - aarch64: x264_denoise_dct_neon - - 3.5 times faster. - -commit 4e8ac132cc2feff5786d12c90fd62cf97979bae1 -Author: Janne Grunau -Date: Mon Oct 20 13:12:14 2014 +0200 - - aarch64: x264_coeff_level_run{4,8,15,16} - - All functions ~33% faster. - -commit dd7666742d5a1a7af076fb388c6adf1b10dcdb3e -Author: Janne Grunau -Date: Tue Oct 14 19:20:52 2014 +0200 - - aarch64: NEON asm for intra luma deblocking - - deblock_luma_intra[0]_neon is 2 times fastes, - deblock_luma_intra[1]_neon is ~4 times faster. - -commit 0122fd230cbf7351845dd354d5ee883d741222ef -Author: Janne Grunau -Date: Mon Oct 13 17:29:22 2014 +0200 - - aarch64: x264_deblock_h_chroma_422_neon - - deblock_h_chroma_422 2.5 times faster - -commit 44cb1dcdbdaafeddd98d2ebe3d02408bc380713e -Author: Janne Grunau -Date: Mon Oct 13 12:43:50 2014 +0200 - - aarch64: x264_deblock_h_chroma_mbaff_neon - - deblock_chroma_420_mbaff_neon 2 times faster - -commit f2e439d113ae86a0a1ef8215d4d4111892aed3f7 -Author: Janne Grunau -Date: Fri Oct 10 10:29:15 2014 +0200 - - aarch64: NEON asm for intra chroma deblocking - - deblock_h_chroma_420_intra, deblock_h_chroma_422_intra and - x264_deblock_h_chroma_intra_mbaff_neon are ~3 times faster. - deblock_chroma_intra[1] is ~4 times faster than C. - -commit ce6c94c0bef3350e9546302aae5909404b056fdb -Author: Janne Grunau -Date: Tue Sep 2 10:27:22 2014 +0200 - - aarch64: add myself as author to aarch64/mc.h - -commit be7e5fa6eee2731abdb1b41bc2a4c1a29e672747 -Author: Janne Grunau -Date: Thu Aug 14 14:22:50 2014 +0100 - - aarch64: NEON asm for integral init - - integral_init4h_neon and integral_init8h_neon are 3-4 times faster than - C. integral_init8v_neon is 6 times faster and integral_init4v_neon is 10 - times faster. - -commit eb1d35725e542968c4a6480c157db40570477a95 -Author: Janne Grunau -Date: Wed Aug 13 13:30:53 2014 +0100 - - aarch64: NEON asm for 8x16c intra prediction - - Between 10% and 40% faster than C. - -commit 40d5db342b7f5198db9826a51f31e454bd208596 -Author: Janne Grunau -Date: Tue Aug 12 17:26:10 2014 +0200 - - aarch64: NEON asm for decimate_score - - decimate_score15 and 16 are 60% faster, decimate_score64 is 4 times - faster than C. - -commit 45e1ebf88a1c3bf37e1326ce621a9b735d155885 -Author: Janne Grunau -Date: Fri Aug 8 11:19:35 2014 +0100 - - aarch64: implement x264_sub8x16_dct_dc_neon - - 4 times faster than C. - -commit 90f0b5c1c881f345c9da15bc482055f2a92f8ceb -Author: Janne Grunau -Date: Thu Aug 7 19:46:07 2014 +0200 - - aarch64: implement x264_pixel_asd8_neon - - 7 times faster than C. - -commit f8f8d13d5978b13fc831e041e52aa617550bbdf3 -Author: Janne Grunau -Date: Thu Aug 7 16:49:12 2014 +0200 - - aarch64: NEON asm for 4x16 sad, satd and ssd - - pixel_sad_4x16_neon: 33% faster than C - pixel_satd_4x16_neon: 5 times faster - pixel_ssd_4x16_neon: 4 times faster - -commit 35b91f2410dcf4fc5191dd85ccda7a42eb01eae8 -Author: Janne Grunau -Date: Wed Jul 30 15:48:25 2014 +0100 - - aarch64: implement x264_pixel_ssd_nv12_core_neon - - 13 times faster than C. - -commit 99a1ca1f1a62d51e47d1ac2c92ee9c3bf3b5712b -Author: Janne Grunau -Date: Tue Jul 29 18:26:11 2014 +0100 - - aarch64: implement x264_pixel_vsad_neon - - 35 times faster than C. - -commit 6c1632493e5afac8be1e1693377dab27f4704a1d -Author: Janne Grunau -Date: Tue Jul 29 11:06:24 2014 +0100 - - aarch64: NEON asm for missing x264_zigzag_* functions - - zigzag_scan_4x4_field_neon, zigzag_sub_4x4_field_neon, - zigzag_sub_4x4ac_field_neon, zigzag_sub_4x4_frame_neon, - igzag_sub_4x4ac_frame_neon more than 2 times faster - - zigzag_scan_8x8_frame_neon, zigzag_scan_8x8_field_neon, - zigzag_sub_8x8_field_neon, zigzag_sub_8x8_frame_neon 4-5 times faster - - zigzag_interleave_8x8_cavlc_neon 6 times faster - -commit d040d28514db7d1fbd5c3f06c37a77de14b15e5b -Author: Janne Grunau -Date: Fri Jul 25 11:53:17 2014 +0100 - - aarch64: implement x264_pixel_sa8d_satd_16x16_neon - - ~20% faster than calling pixel_sa8d_16x16 and pixel_satd_16x16 - separately. - -commit 91a01d4ca95ee1c621578e118b86d767eab96b3b -Author: Janne Grunau -Date: Thu Aug 14 23:13:27 2014 +0200 - - aarch64: optimize x264_predict_8x8c_dc_left_neon - - 25% faster than the previous version. - -commit 8ae4e1cfa3d16451ccf285228d309f6f4940a747 -Author: Henrik Gramner -Date: Sat Aug 2 18:26:18 2014 +0200 - - x86: Make AVX2 also imply FMA3 - - All CPUs with AVX2 supports FMA3 (but not the other way around). - -commit 06882793b260824bc578d0530f64e7f30f2a9f39 -Author: Anton Mitrofanov -Date: Thu Nov 13 22:52:00 2014 +0300 - - Simplify libx264 API usage example - -commit 6a301b6ee0ae8c78fb704e1cd86f4e861070f641 -Author: Henrik Gramner -Date: Fri Nov 21 23:47:20 2014 +0100 - - AvxSynth: Remove a bunch of unused cruft - -commit 30140b34b879605cf70cab0634a4a8faef5b6e60 -Author: Anton Mitrofanov -Date: Wed Dec 3 22:36:12 2014 +0300 - - Fix bugs/typos in motion compensation and cache_load - - Didn't affect output due to the incorrect values either not being used in the - code path or producing equal results compared to the correct values. - - Also deduplicate hpel_ref arrays. - -commit a46820e00ad3c86b80f5830ed92553de474b7d5c -Author: Anton Mitrofanov -Date: Sun Nov 30 23:39:28 2014 +0300 - - checkasm: Fix undefined behavior warnings - -commit 4e97ca566fdf6cd36281e26ee68f64993f4751a1 -Author: Henrik Gramner -Date: Sat Nov 29 18:47:52 2014 +0100 - - checkasm: Fix V210 reporting - - It would previously report FAILED if any of the earlier plane_copy tests failed. - -commit 24e4fed388fcb34c33df7c87e7d6758b9ebed40c -Author: Anton Mitrofanov -Date: Sun Oct 12 21:01:53 2014 +0400 - - Safety check against malicious high bit-depth input which could cause crash - -commit 9bec6fed6d1b95f9921f22ba21e7398eff50b75e -Author: Anton Mitrofanov -Date: Sun Oct 12 20:45:40 2014 +0400 - - libx264 API usage example - -commit 329fe5f6498be7ab337d98ac22c17d379335c854 -Author: Henrik Gramner -Date: Fri Oct 17 21:35:42 2014 +0200 - - x86: AVX2 high bit-depth var_16x16 - - 40->27 cycles on Haswell. - -commit 4576cfd8c391b27748d6f97f5b621cec4ed8047c -Author: Henrik Gramner -Date: Wed Oct 8 22:25:35 2014 +0200 - - checkasm: Serialize read_time() calls on x86 - - Improves the accuracy of benchmarks, especially in short functions. - - To quote the Intel 64 and IA-32 Architectures Software Developer's Manual: - "The RDTSC instruction is not a serializing instruction. It does not necessarily - wait until all previous instructions have been executed before reading the counter. - Similarly, subsequent instructions may begin execution before the read operation - is performed. If software requires RDTSC to be executed only after all previous - instructions have completed locally, it can either use RDTSCP (if the processor - supports that instruction) or execute the sequence LFENCE;RDTSC." - - RDTSCP would accomplish the same task, but it's only available since Nehalem. - - This change makes SSE2 a requirement to run checkasm. - -commit b85a74a22f79c8722674c4cfd7cddf5f54c8421d -Author: Vittorio Giovara -Date: Mon Sep 29 18:51:30 2014 +0100 - - Support case-independent string options - -commit 20f116b29e93574e9607d1abf2960f32b5730e52 -Author: Anton Mitrofanov -Date: Sat Sep 6 20:44:49 2014 +0400 - - Shut up gcc -Wuninitialized warnings - -commit 3df1d248dd8a4b0d0dffd149effe2bde38de49aa -Author: Anton Mitrofanov -Date: Fri Sep 5 19:43:52 2014 +0400 - - Shut up clang -Wuninitialized warning - -commit 01204b60367f4959e8393652dd30f0cfba2d2c80 -Author: Anton Mitrofanov -Date: Fri Sep 5 19:30:47 2014 +0400 - - Fix few clang -Wunused-* warnings - -commit 9df377f87702c82a2202d34919c07e32c60b40ae -Author: Anton Mitrofanov -Date: Thu Aug 28 20:13:13 2014 +0400 - - Fix inappropriate instruction use - -commit 73b8686fc22c9247d90963983d406cd7b9131068 -Author: Anton Mitrofanov -Date: Thu Aug 28 18:38:53 2014 +0400 - - x264asm: warn when inappropriate instruction used in function with specified cpuflags - -commit 204a9bd0a1bc507cbd69a77f3318afcb56ede65d -Author: Anton Mitrofanov -Date: Tue Sep 2 01:48:00 2014 +0400 - - Fix VBV with true VFR streams - -commit b36d44c68cddff00c5b6de1e6cb6a86c1af2cbfc -Author: Anton Mitrofanov -Date: Mon Sep 1 22:45:00 2014 +0400 - - Fix VBV - -commit dd79a61e0e354a432907f2d1f7137b27a12dfce7 -Author: Anton Mitrofanov -Date: Wed Jul 30 03:03:32 2014 +0400 - - Update to the current lavf API and fix memory leak when using --seek - -commit 91727d729a4a33a3f21188f838077040740cb353 -Author: Henrik Gramner -Date: Tue Aug 5 01:42:55 2014 +0200 - - x86inc: Make INIT_CPUFLAGS support an arbitrary number of cpuflags - - Previously there was a limit of two cpuflags. - -commit d4317786b8428b00978459f6de3db219f0f6f8e6 -Author: Henrik Gramner -Date: Tue Aug 5 01:42:51 2014 +0200 - - x86: Minor pixel_ssim_end4 improvements - - Reduce the number of vector registers used from 7 to 5. - Eliminate some moves in the AVX implementation. - Avoid bypass delays for transitioning between int and float domains. - -commit 98100b88b475227f375d9bcbaea0bac57008accc -Author: Henrik Gramner -Date: Tue Aug 5 01:42:47 2014 +0200 - - x86: Faster quant_4x4x4 - - Also drop the MMX version instead of doing a bunch of ifdeffery to support it after this change. - -commit 56fcb444c4c118ff67cf12838d2b2801d7b43407 -Author: Anton Mitrofanov -Date: Sun Aug 10 22:46:12 2014 +0400 - - configure: improve cc_check for clang and ICL to not ignore unknown options - -commit ecb04d08af654a7cfd5b9aa6261bd789de20613a -Author: Henrik Gramner -Date: Tue Aug 5 01:42:44 2014 +0200 - - checkasm: Only call x264_cpu_detect() once - -commit 1343db872b1d7d43dc7fb431a8207efb5ca31e2e -Author: Janne Grunau -Date: Fri Jul 18 14:49:10 2014 +0100 - - aarch64: deblocking NEON asm - - Deblock chroma/luma are based on libav's h264 aarch64 NEON deblocking - filter which was ported by me from the existing ARM NEON asm. No - additional persons to ask for a relicense. - -commit 3c1fa5d9b2ea62f05473080313c543b7e795b307 -Author: Janne Grunau -Date: Fri Jul 18 09:29:35 2014 +0100 - - aarch64: intra predition NEON asm - - Ported from the ARM NEON asm. - -commit 556b0e7928d14818454e0c33032754f6323f02e9 -Author: Janne Grunau -Date: Thu Jul 17 15:58:44 2014 +0100 - - aarch64: motion compensation NEON asm - - Ported from the ARM NEON asm. - -commit 6cda439867fcd9e884a10502845fb79fc7ffed69 -Author: Janne Grunau -Date: Wed Jul 16 10:03:52 2014 +0100 - - aarch64: transform and zigzag NEON asm - - Ported from the ARM NEON asm. - -commit db5c504aa06550f8e916157d1dcc657818e84d62 -Author: Janne Grunau -Date: Tue Jul 15 12:57:03 2014 +0100 - - aarch64: quantization and level-run NEON asm - - Ported from the ARM NEON asm. - -commit f4a82a54885f3dad7106a6855eaef50ea085b27e -Author: Janne Grunau -Date: Wed Mar 19 13:48:21 2014 +0100 - - aarch64: pixel metrics NEON asm - - Ported from the ARM NEON asm. - -commit 3e57554ee4db6ade7a2dccaac92cb8116f3a43d6 -Author: Janne Grunau -Date: Fri Jul 18 17:44:57 2014 +0200 - - aarch64: add utility functions for asm - -commit efaf0b88f7c703533ee8857a6a5039cf64bce3a0 -Author: Janne Grunau -Date: Wed Mar 19 13:45:17 2014 +0100 - - aarch64: add armv8 and neon cpu flags and test them - -commit 943128a527d1b98a63017d58cd1fcf53aaffcb6e -Author: Janne Grunau -Date: Tue Mar 18 22:10:24 2014 +0100 - - aarch64: initial build support - -commit ee427b69868d506182f4e22bffdc45e913f255af -Author: Janne Grunau -Date: Tue Jul 22 19:28:27 2014 +0200 - - checkasm: test zigzag_sub_8x8_{frame,field} - -commit 69740fd362ee1c0a2e80d6f4e2724d731a3c951c -Author: Janne Grunau -Date: Sun Jul 20 18:29:01 2014 +0200 - - arm: use long multiplication in mc_weight_w*_neon - - 9-19% faster on a cortex-a9. - -commit 0a05b3f9aa8c524a67119ec5eb6bcc24eb8f2f3b -Author: Janne Grunau -Date: Sun Jul 20 18:24:57 2014 +0200 - - arm: do not use aligned stores in mc_weight_w4_*neon - - mc_weight_w4_*neon is also used for width 2 which does not guarantee - 4-byte aligned destination. Fixes crashes caused by random memory - corruption. - -commit c2df1fc65c98e213c444134d5dbbb79d439af4db -Author: Janne Grunau -Date: Wed Apr 2 16:31:28 2014 +0200 - - checkasm: add memory clobber to read_time inline asm - - The memory acts as compiler barrier preventing aggressive reordering - of read_time calls. gcc 4.8 reorders some of initial read_time calls - after the second when targeting arm. - -commit d72760401cb0602b8bf86037988e66cdc810681c -Author: Janne Grunau -Date: Sun Jul 20 13:32:10 2014 +0200 - - arm: check if the assembler supports the '.func' directive - - The integrated assembler in llvm trunk (to be released as 3.5) is - otherwise capable enough to assemble the arm asm correctly. - -commit 9463ec0004f1bddc49c05ed8e38430a4ce1738fb -Author: Janne Grunau -Date: Sun Jul 20 13:40:28 2014 +0200 - - arm/ppc: use $CC as default assembler - -commit feec4a478bfdfb4426268b2ee79bac473b97488c -Author: Janne Grunau -Date: Sun Jul 20 13:34:27 2014 +0200 - - arm: move instructions after '.rept' to separate line - - The gas manual states "Repeat the sequence of lines between the .rept - directive and the next .endr directive ...". GNU as seems to support - instructions on the same line as .rept anyway but the integrated - assembler in llvm trunk (to be released 3.5 in August 2014) does not. - -commit 6e8971021d2a12505cb2ad9ea677dfc8af676919 -Author: Janne Grunau -Date: Sun Jul 20 13:08:17 2014 +0200 - - arm: set .arch/.fpu from asm.S - -commit 716ee56d0b35e512e8e0ae1a3e71f26e65e86be3 -Author: Janne Grunau -Date: Sun Jul 20 12:55:53 2014 +0200 - - arm: do not append CFLAGS to ASFLAGS - -commit 021c0dc6c95c1bc239c9db78a80dd85fc856a4dd -Author: Tristan Matthews -Date: Thu Jul 17 00:03:50 2014 -0400 - - filters: fix sizeof mismatch - -commit 95beb822e61a8d84dba9743f4b20b4c303f26798 -Author: Anton Mitrofanov -Date: Thu Jul 31 16:17:32 2014 +0400 - - Fix memory leak when using select_every filter - -commit ea0ca51e94323318b95bd8b27b7f9438cdcf4d9e -Author: Tsukasa OMOTO -Date: Sun Jul 20 22:17:11 2014 +0900 - - Fix cltostr.sh on OS X - -commit 08d36b3fc975d049aa3786ca34fb0b2f2ba0007c -Author: Fiona Glaser -Date: Wed Jul 9 12:21:33 2014 -0700 - - Check pf_log is set in validate_parameters - - Help remind people to call x264_param_default in case they didn't read the - documentation. - -commit 9e93d18b7fe7668f8277b5f117d7e39be24c6070 -Author: Anton Mitrofanov -Date: Wed Jul 9 17:17:04 2014 +0400 - - Check malloc during frame dumping - -commit 8a85db879d57537f91a9908be3585512981c08b8 -Author: Yusuke Nakamura -Date: Thu Jun 19 05:21:29 2014 +0900 - - mp4_lsmash: Use new I/O API instead of deprecated one. - -commit f112c0e1cae71eb5b98b4f86f635f235cc7b81cb -Author: Anton Mitrofanov -Date: Sun Jun 8 22:19:46 2014 +0400 - - Remove meaningless use of abs() - -commit 6fbbb5b0c05a1d95cbd6efa7f01808ea87a39dc9 -Author: Steven Walters -Date: Sat May 31 10:31:16 2014 -0400 - - MSVS 2013 Update 2 support - - The first MSVS compiler C99 compliant enough to build x264. - Use `CC=cl ./configure` to compile with it. - -commit f53af048ed94551734265cf8f9dbe12d211a77fc -Author: Diego Biurrun -Date: Tue Apr 15 22:54:08 2014 +0200 - - configure: Add -Wno-maybe-uninitialized to CFLAGS - - The warnings generated by -Wmaybe-uninitialized are mostly spurious. - -commit cbd8d7b6db1f29929d1ad347e15afe7828ad7055 -Author: Diego Biurrun -Date: Wed May 7 13:20:43 2014 +0200 - - build: Replace cltostr.pl by a shell script - - This avoids a dependency on Perl to build OpenCL support. - -commit d8b6ce7f703d3c9d83dbd4e8ef44cfabc7e2f78e -Author: Diego Biurrun -Date: Tue Apr 15 23:02:39 2014 +0200 - - build: Simplify phony target declaration with wildcards - - Also add etags to list of phony targets. - -commit 2bd932fdf053faace84028a66d8ba9e17d526456 -Author: Diego Biurrun -Date: Wed May 7 12:47:37 2014 +0200 - - configure: Drop workaround for obsolete gcc 4.2 on ARM - -commit 31311f254971e1da51d817cb580fc4fe1f4d5f20 -Author: Diego Biurrun -Date: Wed May 7 21:43:15 2014 +0200 - - build: Add dependencies on x86inc.asm/x86util.asm for all .asm files - - This is a little bit overzealous, but errs on the side of caution. - Generating full dependency information is also possible, but slightly - slows down the build as YASM cannot do it as a sideeffect of compilation. - -commit 016831ec7b3a4a7062908243dbde62d7d89b334e -Author: Diego Biurrun -Date: Sun Apr 27 21:09:54 2014 +0200 - - Delete all SPARC optimizations - - SPARC has been obsolete for a long time and makes little sense as a - H.264 encoding platform. - - Also update authors file. - -commit c7c8eb15923d1888bb87e7642a66b417fab61e76 -Author: Diego Biurrun -Date: Wed May 7 12:46:42 2014 +0200 - - configure: Don't check for libavcore - - libavcore was a never-released bad idea with a short lifespan. - -commit dd5b5d3959e35c122c7709a9823a26b589c950da -Author: Diego Biurrun -Date: Sun Apr 27 23:19:04 2014 +0200 - - build: Set all ASFLAGS from within configure - - This is how all other toolchain flags are handled. - -commit c15f20bd772487d863f01a2813a3ab45b1f11a6b -Author: Diego Biurrun -Date: Sun Apr 27 23:23:49 2014 +0200 - - opencl: Check return value of fread() - - common/opencl.c:138:10: warning: ignoring return value of 'fread', declared with attribute warn_unused_result [-Wunused-result] - -commit af8e768e2bd3b4398bca033998f83b0eb8874914 -Author: Fiona Glaser -Date: Sat Jul 19 20:34:22 2014 -0700 - - Disable i8x8 in lossless - - x264's implementation was slightly incorrect due to a vague spec, so some - decoders decoded video incorrectly. - - Minimal impact on compression. - -commit 450cf7ae2592ee0cb474bcefedf90c9911605e26 -Author: Thomas Mundt -Date: Fri Jun 27 11:12:06 2014 -0700 - - AVC-Intra: fix compatibility with Avid Transfermanager - -commit 6eb483e4ca23f34a6a8fe09f3f2e9c9f192fd76b -Author: Henrik Gramner -Date: Tue Jul 8 21:15:32 2014 +0200 - - x86: Fix SIGILL in high bit-depth intra_sad_x3_4x4_sse2 - - An SSE3 instruction was used in an SSE2 function. - -commit 5e58ce7a8b39ab66c7d6420b85a8e09dd08dfaaf -Author: Anton Mitrofanov -Date: Wed Jul 9 17:01:54 2014 +0400 - - Fix incorrect row predictor addressing - - Somehow managed to not cause things to explode, but was clearly incorrect. - Might improve VBV in some cases to have this working right. - -commit 3fda920e6f1e4a8f76680c001962542866408114 -Author: Anton Mitrofanov -Date: Sat Jun 21 23:52:39 2014 +0400 - - Fix b-pyramid MMCO remove for frame-packing==5 - -commit 92fdb43dd47fbc3368d9d9c7ad940fbe03657bd3 -Author: Tal Aloni -Date: Tue Jun 17 15:10:56 2014 -0700 - - Fix frame-packing==5 with some decoders - - The spec mandates that frame-packing==5 requires the SEI on every frame that - begins a view sequence (i.e. the input frames L0-R0-L1-R1 have 4 view sequences, - but if reordered by the encoder to L0-L1-R0-R1 there are now 2 view sequences). - For simplicity, we write the SEI on every frame. - - This fixes frame-packing==5 3D playback on some decoders (PlayStation 3, Sony - W8 series, possibly others). - -commit 13d6dfd83af98e472a9e9a8b6abf5c971707a893 -Author: Anton Mitrofanov -Date: Thu May 22 13:27:00 2014 +0400 - - Fix pixel_ssim_end4 asm function for x86_64 systems - -commit a5831aa256b3161f898d2577d2eb8daa838d88d2 -Author: James Almer -Date: Wed Apr 9 03:33:06 2014 -0300 - - x86: XOP pixel_sad_{x3, x4} high bit-depth - -commit 0d989a4ff3298f9e495be452880b5f9bfb441e93 -Author: James Almer -Date: Wed Apr 9 03:33:05 2014 -0300 - - x86: XOP pixel_ssd_nv12_core - -commit 9b77dffab04e3ea242598454282b40800e720353 -Author: James Almer -Date: Wed Apr 9 03:33:04 2014 -0300 - - x86util: XOP optimized HADDD - -commit 1e517399f76b12fe2e73892970fe3aac01a178f8 -Author: James Almer -Date: Wed Apr 9 03:33:03 2014 -0300 - - x86: add missing initialization for high bit-depth sa8d_satd - -commit aa00925abd6f9ab4e20216ae5a5ad79b67756162 -Author: James Almer -Date: Sat Apr 5 23:46:31 2014 -0300 - - x86: add missing initializations for high bit-depth variance - -commit fadc4045f91ca78c046f301cba6065732b5d27ea -Author: Janne Grunau -Date: Tue Apr 1 22:11:45 2014 +0200 - - arm: use the weight_fn_t typedef for mc weight function arrays - -commit 644c396be97c1e6ace144f8be04afab19fb238af -Author: Janne Grunau -Date: Tue Apr 1 22:11:44 2014 +0200 - - arm: correct x264_mc_chroma_neon function declaration - -commit b2e9ca30f1e9ac25df1f592db04ff0d91faf42d4 -Author: Janne Grunau -Date: Tue Apr 1 22:11:43 2014 +0200 - - arm: do not export every asm function - - Based on Libav's libavutil/arm/asm.S. Also prevents having the same - label twice for every function on systems not defining EXTERN_ASM. - Clang's integrated assembler does not like it. - -commit ceb1484da34b7492f539b535a930652690372fe5 -Author: Janne Grunau -Date: Tue Apr 1 22:11:42 2014 +0200 - - arm: move all .macro/.endm to column 0 - -commit 24ab0e75db887c2b1a412d00878810ed6501061e -Author: William Grant -Date: Sun Mar 23 09:21:52 2014 -0700 - - aarch64: require PIC in shared mode - -commit 435722c9c1870cd54fdb89be39250d492aecb598 -Author: Janne Grunau -Date: Sun Mar 16 17:21:58 2014 +0100 - - arm: x264_coeff_last8_arm - - checkasm --bench on a coretex-a9: - coeff_last8_c: 173 - coeff_last8_armv6: 151 - - 60 instead of 73 cycles in ~130k runs on the same cpu while encoding. - -commit 2e96c571b8c324304b3d4fbb7914143518349213 -Author: Janne Grunau -Date: Sat Mar 15 20:09:18 2014 +0100 - - arm: x264_store_interleave_chroma_neon - - store_interleave_chroma_c: 4036 - store_interleave_chroma_neon: 1043 - -commit 1576e51e52148ad1e1d8b5e76562f9eae8d47e6e -Author: Janne Grunau -Date: Sat Mar 15 19:55:50 2014 +0100 - - arm: x264_plane_copy_interleave_neon - - plane_copy_interleave_c: 40285 - plane_copy_interleave_neon: 10137 - -commit 0016dec27080e53c794d7f919bd6df6b890d0128 -Author: Janne Grunau -Date: Sat Mar 15 19:21:12 2014 +0100 - - arm: x264_plane_copy_deinterleave_rgb_neon - - plane_copy_deinterleave_rgb_c: 31543 - plane_copy_deinterleave_rgb_neon: 8312 - -commit 5e0ca9aa4eab5e2cb4b124774c3ecebbc6f1ae35 -Author: Janne Grunau -Date: Sat Mar 15 18:22:49 2014 +0100 - - arm: load_deinterleave_chroma_f{dec,enc}_neon - - load_deinterleave_chroma_fdec_c: 4055 - load_deinterleave_chroma_fdec_neon: 995 - load_deinterleave_chroma_fenc_c: 4071 - load_deinterleave_chroma_fenc_neon: 992 - -commit c9a5ae0d219b6a28adebdb83faf89f291611f57b -Author: Janne Grunau -Date: Sat Mar 15 17:22:08 2014 +0100 - - arm: x264_plane_copy_deinterleave_neon - - plane_copy_deinterleave_c: 42988 - plane_copy_deinterleave_neon: 10184 - -commit c570be3ea9f24942c362e1c2402ec7fccbb5c330 -Author: Janne Grunau -Date: Sat Mar 15 13:29:41 2014 +0100 - - arm: implement deblock_strength_neon - - Based on deblock_strength_avx. - - checkasm --bench on a cortex-a9: - deblock_strength_c: 14611 - deblock_strength_neon: 1848 - -commit 2794ba5bb0007e0edf32d5325ca82cbf654f79b0 -Author: Janne Grunau -Date: Sat Mar 15 10:51:11 2014 +0100 - - arm: add missing macro instantiation for x264_pixel_avg_4x16_neon - - checkasm --bench on a cortex-a9: - avg_4x16_c: 8910 - avg_4x16_neon: 2091 - -commit d6002ebace8194d17ee0ba607ff82c4f9075dd2d -Author: Janne Grunau -Date: Thu Mar 13 01:02:13 2014 +0100 - - arm: implement x264_predict_4x4_v_armv6 - - Alone probably not worth it but allows use of predict_4x4_dc|h_armv6 - in intra_sad|satd_x3_4x4_neon. - -commit d7e689680023e327de7e052e01e7faee30135799 -Author: Roland Stigge -Date: Sun Mar 23 09:29:37 2014 -0700 - - ppc: fix build on certain PowerPC variants without Altivec - -commit 863ea2a224cf7380c7a6ea9ae531e16b621cc0b7 -Author: Anton Mitrofanov -Date: Tue Apr 22 00:58:24 2014 +0400 - - Only add strip option '-s' for linker flags - - Fixes some build warnings with clang. - -commit 4102614df9a11d66b506fb435132ddd0f88c6f94 -Author: Tsukasa OMOTO -Date: Sat Mar 15 16:53:53 2014 +0900 - - configure: remove an unnecessary option from CFLAGS on OS X - - Fixes Clang 3.4 compilation on OS X. - -commit b3fb718404d6cce9c82987ea2909cda5072d040c -Author: Fiona Glaser -Date: Sun Feb 23 10:36:55 2014 -0800 - - Macroblock tree overhaul/optimization - - Move the second core part of macroblock tree into an assembly function; - SIMD-optimize roughly half of it (for x86). Roughly ~25-65% faster mbtree, - depending on content. - - Slightly change how mbtree handles the tradeoff between range and precision - for propagation. - - Overall a slight (but mostly negligible) effect on SSIM and ~2% faster. - -commit 00a00ccab316de3d50da6a82ba4af44dcb4655ec -Author: Janne Grunau -Date: Thu Mar 13 00:05:48 2014 +0100 - - arm: use available neon functions for intra_sa8d/sad/satd_x3 - - 4% faster on main/medium, 15% faster on baseline/superfast on a cortex-a9. - -commit ac8f2e8a4cf21b2026957509bea8865ff7879fb4 -Author: Janne Grunau -Date: Wed Mar 12 14:35:31 2014 +0100 - - arm: implement x264_pixel_var2_8x16_neon - - checkasm --bench on a cortex-a9: - var2_8x16_c: 5677 - var2_8x16_neon: 1421 - -commit 66836125beabdaff561da89ea1e18e566f5d202a -Author: Janne Grunau -Date: Wed Mar 12 13:16:00 2014 +0100 - - arm: implement x264_pixel_var_8x16_neon - - checkasm --bench on a cortex-a9: - var_8x16_c: 4306 - var_8x16_neon: 791 - -commit a90ea34cf264d6b7733c5ffbe6d46882c306b50f -Author: Henrik Gramner -Date: Sun Feb 23 15:33:48 2014 +0100 - - x86: SSE2 and SSSE3 plane_copy_deinterleave_rgb - - About 5.6x faster than C on Haswell. - -commit f032147ca69401165495a36cf7aba5b8c95ecb3b -Author: Henrik Gramner -Date: Sun Feb 16 21:24:54 2014 +0100 - - x86: Minor mbtree_propagate_cost improvements - - Reduce the number of registers used from 7 to 6. - Reduce the number of vector registers used by the AVX2 implementation from 8 to 7. - Multiply fps_factor by 1/256 once per frame instead of once per macroblock row. - Use mova instead of movu for dst since it's guaranteed to be aligned. - Some cosmetics. - -commit 7c860f075ccd14fb7891d5fc6c9eab1a37ea555d -Author: Henrik Gramner -Date: Sun Feb 9 23:58:04 2014 +0100 - - x86inc: Support arbitrary stack alignments - - If the stack is known to be at least 32-byte aligned we can safely store ymm - registers on the stack without doing manual alignment. - - Change ALLOC_STACK to always align the stack before allocating stack space for - consistency. Previously alignment would occur either before or after allocating - stack space depending on whether manual alignment was required or not. - -commit 039fab9203179f9e790abfd54ae5b2254ef803e7 -Author: Anton Mitrofanov -Date: Fri Feb 14 15:53:58 2014 +0400 - - x86inc: warn if XOP integer FMA instruction emulation is impossible - - Emulation requires a temporary register if arguments 1 and 4 are the same; this - doesn't obey the semantics of the original instruction, so we can't emulate - that in x86inc. - - ffmpeg has an x86util emulation for that case; I'll add it if x264's asm ever - needs it. - - Also add pmacsdql emulation. - -commit 974f2e78e0cb25e06fedbcfef70f80938f22988b -Author: Loren Merritt -Date: Sat Mar 1 02:57:56 2014 +0000 - - x86inc: free up variable name "n" in global namespace - -commit 8596dd36df38d33d402e848035b1bd31edc2c389 -Author: Henrik Gramner -Date: Wed Jan 22 19:09:12 2014 +0100 - - x86: Pass -Worphan-labels to yasm - - Makes it easier to detect typos. - -commit 0bb3b2edb866dd852bb1f5faed88df4bdcf0c16f -Author: Steve Lhomme -Date: Sun Feb 16 13:15:09 2014 +0100 - - Write 3D metadata when outputting Matroska - - For when --frame-packing is set. - -commit f35e3fc26b99e1b3c943c131100fdfa4733fc932 -Author: Anton Mitrofanov -Date: Sun Feb 23 16:56:03 2014 +0400 - - Don't set chroma_loc_info_present_flag for non-4:2:0 - - The H.264 spec says it shouldn't be set in these cases. - -commit b7a50c16414631c8ff5e417da51b190c8999027e -Author: Fiona Glaser -Date: Mon Mar 10 08:42:50 2014 -0700 - - x264.h: fix documentation - - The full details of the return values of encoder_encode and encoder_headers - were mistakenly removed a while ago; re-add them. - -commit de01d8821b59b85a01c8a89e544e0fed6488b958 -Author: Anton Mitrofanov -Date: Sun Feb 23 15:52:57 2014 +0400 - - Fix pointer cast warning for 64-bit builds - -commit 8b821ec19ba9425c120b8986a57ca7c6b9f088ed -Author: Anton Mitrofanov -Date: Mon Mar 10 16:48:02 2014 +0400 - - mbaff: fix mb_field_decoding_flag tracking and simplify allow skip check - - Fixes an issue with too many forced non-skips in mbaff+cavlc, as well as - non-deterministic output with mbaff+cavlc+sliced-threads. - -commit 850c8c5d6139df82e969d2174eebba69b479aa16 -Author: Anton Mitrofanov -Date: Mon Mar 10 03:22:57 2014 +0400 - - Fix memory overwrite in x264_deblock_h_chroma_mbaff_sse2 - - Fixes possible corruption with MBAFF+sliced threads. - -commit 19dddbcff73541ae15f8e57383ff1c6aa907d99d -Author: Fiona Glaser -Date: Sun Mar 2 10:09:01 2014 -0800 - - Fix corruption with CAVLC overflow handling in MBAFF+main profile - - Probably a regression in r2178. - -commit 48dbfa28201950f7e07e96a7d62b2951dd2dbe03 -Author: Anton Mitrofanov -Date: Mon Mar 10 21:17:19 2014 +0400 - - Fix checkasm --bench output when nop_cycles is too large - -commit ee8d5e4b51da99e576b5aea3008e70d1c7ed2372 -Author: Anton Mitrofanov -Date: Wed Jan 22 12:54:49 2014 +0400 - - Really fix quantization factor allocation - - Actually allocate less (instead of just initialize less) and fix comments. - -commit 0d668be8d7525992c1c163c97551ee897e43c177 -Author: Yu Xiaolei -Date: Sun Feb 23 04:12:51 2014 -0800 - - Fix build with Android NDK - - Android NDK does not expose sched_getaffinity. - -commit 42d25196d423626c12794db3f66322c7a3f4375e -Author: Loren Merritt -Date: Thu Jan 16 13:34:46 2014 -0800 - - x86inc: speed up compilation with yasm - - Work around yasm's inefficiency with handling large numbers of variables - in the global scope. - -commit dd6a303498d1f55c73037ed925a6ece8e28a95bc -Author: Kieran Kunhya -Date: Fri Jan 10 23:27:33 2014 +0000 - - Add support for AVC-Intra Class 200 - -commit 41227fa2531d9263e481b80237d2d9ef6f5a450f -Author: James Weaver -Date: Tue Jan 7 10:31:58 2014 +0000 - - v210 input support - - Assembly based on code by Henrik Gramner and Loren Merritt. - -commit e2a9662751180b7dd2fe538913282ee800445445 -Author: Fiona Glaser -Date: Tue Jan 21 13:39:33 2014 -0800 - - Fix quantization factor allocation - - We don't need to wastefully allocate quant tables above QP_MAX_SPEC; they're - never used. - -commit 8be6600d10a74ca241dbb27e096883ceed7b4082 -Author: Henrik Gramner -Date: Wed Jan 8 01:06:56 2014 +0100 - - Avoid some unneccesary memory loads in macroblock_encode - -commit 807aeaaae7351e4c2c536463e69dacaac218bccb -Author: Henrik Gramner -Date: Sun Jan 5 15:25:05 2014 +0100 - - Bump dates to 2014 - - Also update AUTHORS file and my e-mail address in the headers of various files. - -commit 02697d57d987f8d51a5c3ced5e5b81d7137012ee -Author: Henrik Gramner -Date: Mon Jan 6 00:18:31 2014 +0100 - - Remove tools/xyuv.c - - It's an old stand-alone application that isn't relevant to x264. - -commit 7664014b2b490d81a66f2a13138182dfaaf4be06 -Author: Anton Mitrofanov -Date: Thu Nov 7 02:37:23 2013 +0400 - - Use 8x16c wrappers with x86 asm functions for 4:2:2 with high bit depth - -commit 6bc63417e10e135d8cd881495c71be72d322e1d3 -Author: Henrik Gramner -Date: Fri Dec 20 22:44:28 2013 +0100 - - CLI: Avoid redundant 16-bit upconversions in piped raw input - - It's not possible to seek in pipes, so if we want to skip frames we have to read and - discard unused ones. It's pointless to do bit-depth upconversions in those frames. - -commit 008c56ec467736bc5d3130ff890c618d28aa7511 -Author: Anton Mitrofanov -Date: Fri Jan 3 20:06:06 2014 +0400 - - Fix input support from named pipes in Windows - -commit 91481419e3acc4bb601600cf32e46e7f93ae02ab -Author: Steve Clark -Date: Wed Nov 20 21:40:23 2013 +0400 - - Fix ARM asm compilation with Apple assembler - -commit a2f5d600bf866899db92e2dae40eb9fe46d44ade -Author: Anton Mitrofanov -Date: Wed Nov 13 19:24:48 2013 +0400 - - Fix uninitialized variable - - Caused if the timebase is not specified in stats file. Found by Clang. - -commit 95d196ef2edde109cfb32f4baa9b0adc67e842e1 -Author: Anton Mitrofanov -Date: Sun Oct 27 19:27:23 2013 +0400 - - Remove --visualize option. - - It probably wasn't used or maintained for last few years. - -commit 09c7010e3d13e66a241c0529b36ae3f7e1664ff4 -Author: Anton Mitrofanov -Date: Tue Oct 15 12:32:25 2013 +0400 - - Add L-SMASH support as preferable alternative for MP4-muxing - -commit c9f2bceb1f37aeaf6b7ed730f0fd210ef8725cab -Author: Kieran Kunhya -Date: Sat Sep 21 19:16:12 2013 +0100 - - Add AVC-Intra 1080p50/60 Class 100 parameters - - Also add some compatibility fixes. - -commit c084f6c029f016cf2024a2fc511825e82fb95865 -Author: Fiona Glaser -Date: Mon Sep 9 12:37:59 2013 -0700 - - Add --filler option - - Allows generation of hard-CBR streams without using NAL HRD. - Useful if you want to be able to reconfigure the bitrate (which you can't do - with NAL HRD on). - -commit 350b214c5abe7e82618ac46a14f23b7ab543045e -Author: Anton Mitrofanov -Date: Sun Oct 27 15:22:51 2013 +0400 - - Make x264_encoder_reconfig more threadsafe - - Do the reconfig when the next frame's encode begins. - Fixes some rare crashes with frame-threading and encoder_reconfig. - -commit 77cc44feea75106fae6d3113f6babbbe8cffba87 -Author: Fiona Glaser -Date: Thu Oct 24 17:19:00 2013 -0700 - - chroma-me: take shortcut in BI analysis - - ~100 cycles faster with subme>=9 - -commit 7634f8c6047e9e12036778a8dc8d4cd4b06eebcb -Author: Fiona Glaser -Date: Thu Oct 24 14:44:43 2013 -0700 - - CRF-max: don't warn if VBV underflow occurs - - Only warn if underflow occurs for reasons other than CRF-max, as CRF-max - implies that VBV underflow is desired by the user. - -commit 4b68633dc375fc372f160a3ae669a32e519b285a -Author: Henrik Gramner -Date: Fri Oct 18 22:43:36 2013 +0200 - - x86inc: Make ym# behave the same way as xm# - - This makes more sense for future implementations of templates with zmm registers. - -commit b54422a858809f39c00fac46207bfa8ad16cdb28 -Author: Henrik Gramner -Date: Fri Oct 18 22:21:38 2013 +0200 - - Use calloc instead of malloc + memset - -commit 8b58a4ce52047b00f5892a9cdd92f9695a50a933 -Author: Henrik Gramner -Date: Thu Oct 10 16:54:12 2013 +0200 - - Replace gf_malloc with regular malloc in mp4 muxer - - It was used as a workaround for a bug that only existed in the GPAC repository - for a few weeks back in 2010. There's no reason to keep it anymore. - -commit 05f04384a10cb673abea7749cd319971c0017769 -Author: Anton Mitrofanov -Date: Tue Oct 8 23:20:40 2013 +0400 - - Update to current libav/ffmpeg API - -commit b7b6029f0e121b87fd96595b15d0c40fcd1b3bf1 -Author: Rafaël Carré -Date: Fri Oct 25 07:12:24 2013 -0700 - - version.sh: change to use /bin/sh - -commit c3c73f13bb9ee60ccf40f85dbc11c91efac9d1e2 -Author: Sean McGovern -Date: Wed Sep 4 14:15:00 2013 -0700 - - configure: don't generate a git version number if .git isn't present - -commit 12f9d499905199427a0196743c2cde56642d6d99 -Author: Martin Storsjo -Date: Tue Sep 3 14:56:18 2013 -0700 - - configure: include dependency libs in the Libs pkg-config - - If only a static library is built, the user of the library that just - tries to link to the lib using the flags provided by pkg-config - might not know that only a static lib exists and that he'd have to - pass --static to pkg-config to get the internal dependencies to - be able to link the library. - - For a shared build, the internal dependencies are kept in Libs.private - as before. - - This matches how libav's pkg-config files are generated. - -commit 03450be799dea03a83dad4dc833ef8ddd7f36b62 -Author: Anton Mitrofanov -Date: Fri Oct 18 00:38:06 2013 +0400 - - Fix compilation in case of HAVE_LOG2F check fails spuriously - -commit 266fdfcd4809afb018e45ab959d4a56a42712c88 -Author: Anton Mitrofanov -Date: Sat Oct 12 12:01:57 2013 +0400 - - Fix compilation of shared library for Windows with original MinGW toolchain - -commit 50a0c33b9b5fa57d0a129b7441a6af55f7a08005 -Author: Anton Mitrofanov -Date: Tue Oct 8 23:32:37 2013 +0400 - - Fix possible crashes in resize and crop filters with high bitdepth input - -commit 5b272b22d8f7511a4abece5a23ad25282bedaea8 -Author: Tim Mooney -Date: Tue Sep 3 13:43:50 2013 -0700 - - Fix INSTALL in configure for Solaris systems - -commit 2fd292391a4d41b9fc65ee652b4663fdd9f8107e -Author: Henrik Gramner -Date: Wed Aug 28 00:50:31 2013 +0200 - - Workaround for FFMS indexing bug - - If FFMS_ReadIndex is used with an empty index file it gets stuck in an infinite loop instead of returning NULL - like it's supposed to do on failure. Explicitly check if the file is empty before calling it as a workaround. - -commit 5bcff2a62c050376ca54c5e5040d0529c89eb9f2 -Author: Anton Mitrofanov -Date: Mon Aug 26 21:20:31 2013 +0400 - - Fix masked access violation in KERNEL32 - - Caused crashes under gdb in Windows and might cause other unknown problems. - -commit 098b686e6397d5bb6b3a5c03cd918aa88216909f -Author: Hiroki Taniura -Date: Sun Aug 25 01:18:57 2013 +0900 - - Fix GPAC support on Windows - -commit fa3cac516cb71b8ece09cedbfd0ce631ca8a2a4c -Author: Henrik Gramner -Date: Sun Aug 11 19:50:42 2013 +0200 - - Windows Unicode support - - Windows, unlike most other operating systems, uses UTF-16 for Unicode strings while x264 is designed for UTF-8. - - This patch does the following in order to handle things like Unicode filenames: - * Keep strings internally as UTF-8. - * Retrieve the CLI command line as UTF-16 and convert it to UTF-8. - * Always use Unicode versions of Windows API functions and convert strings to UTF-16 when calling them. - * Attempt to use legacy 8.3 short filenames for external libraries without Unicode support. - -commit 9b94896b3735052cabb52d081de3b50020a077cb -Author: Kieran Kunhya -Date: Sat Jul 20 18:47:59 2013 +0100 - - AVC-Intra support - - This format has been reverse engineered and x264's output has almost exactly - the same bitstream as Panasonic cameras and encoders produce. It therefore does - not comply with SMPTE RP2027 since Panasonic themselves do not comply with - their own specification. It has been tested in Avid, Premiere, Edius and - Quantel. - - Parts of this patch were written by Fiona Glaser and some reverse - engineering was done by Joseph Artsimovich. - -commit fa1e2b746d95575b5c5b8e49fcfcad3ded9a5420 -Author: Henrik Gramner -Date: Mon Jul 8 12:06:42 2013 -0700 - - Transparent hugepage support - - Combine frame and mb data mallocs into a single large malloc. - Additionally, on Linux systems with hugepage support, ask for hugepages on - large mallocs. - - This gives a small performance improvement (~0.2-0.9%) on systems without - hugepage support, as well as a small memory footprint reduction. - - On recent Linux kernels with hugepage support enabled (set to madvise or - always), it improves performance up to 4% at the cost of about 7-12% more - memory usage on typical settings.. - - It may help even more on Haswell and other recent CPUs with improved 2MB page - support in hardware. - -commit e33aac9aba5c6b9c867b92f14c7722152680a61a -Author: Henrik Gramner -Date: Fri Jul 5 21:15:54 2013 +0200 - - x86: SSSE3 implementation of pixel_sad_x3 and pixel_sad_x4 - -commit 4becc3e9e031c4207698846369aac2bef1480d15 -Author: Henrik Gramner -Date: Fri Jul 5 21:15:49 2013 +0200 - - x86: Faster AVX2 pixel_sad_x3 and pixel_sad_x4 - -commit 401edc3ab08f95777d495b38030e2108d7d3f0b4 -Author: Diogo Franco -Date: Tue Jul 23 22:17:44 2013 -0300 - - configure: Support cygwin64 - -commit adc99d17d8c1fbc164fae8319b40d7c45f30314e -Author: Derek Buitenhuis -Date: Fri Aug 9 13:39:27 2013 -0400 - - x86inc: Check for __OUTPUT_FORMAT__ having a value of "x64" - - This is also a valid value for WIN64. - -commit 1430b04988c3bb344e104c974ed3aa825035c0ec -Author: Anton Mitrofanov -Date: Tue Jul 23 14:11:50 2013 -0700 - - Fix cases in which intra refresh allowed prediction from disallowed pixels - -commit a6c396f0fe01f453de115ba0d8c4aa26138aa6b4 -Author: Anton Mitrofanov -Date: Wed Aug 7 01:56:34 2013 +0400 - - Fix a few minor bugs found with a static analyzer - -commit 2d66c7c2471801aa946517226739e9150f6c1948 -Author: Fiona Glaser -Date: Fri Jul 12 16:07:35 2013 -0700 - - Fix AVX2 detection bug with "limit CPUID" enabled in BIOS - -commit ff41804efd4caec120fc9e1b90ad226035f75aaa -Author: Henrik Gramner -Date: Fri Jul 5 21:15:43 2013 +0200 - - x86: Remove X264_CPU_SSE_MISALIGN functions - - Prevents a crash if the misaligned exception mask bit is cleared for some reason. - - Misaligned SSE functions are only used on AMD Phenom CPUs and the benefit is miniscule. - They also require modifying the MXCSR control register and by removing those functions - we can get rid of that complexity altogether. - - VEX-encoded instructions also supports unaligned memory operands. I tried adding AVX - implementations of all removed functions but there were no performance improvements on - Ivy Bridge. pixel_sad_x3 and pixel_sad_x4 had significant code size reductions though - so I kept them and added some minor cosmetics fixes and tweaks. - -commit 01087fdbf2042095cb36458fd5c5efab3f4b3a37 -Author: Fiona Glaser -Date: Thu Jun 20 15:51:39 2013 -0700 - - Tweak i16x16-delta-quant-avoidance code - - Don't omit the delta quant if it'd raise the quantizer to do so; this fixes - a rare flickering issue caused by deblocking. - -commit bfa2f0c44cec2e41fbd7566edb55e405f6c5a49d -Author: Fiona Glaser -Date: Sun Jun 9 09:06:27 2013 -0700 - - x86: faster AVX2 iDCT, AVX deblock_luma_h, deblock_luma_h_intra - -commit 397f60e7f23e2c6ec2cb9b168ebb75cc42983dd7 -Author: Lucien -Date: Mon Jun 17 18:28:09 2013 +0000 - - Add new color primaries, transfer characteristics, matrix coefficients - -commit fa215fc9d77d131595e8b1eda0fc4e9da62c1f94 -Author: Fiona Glaser -Date: Fri May 31 17:01:29 2013 -0700 - - Add "--stitchable" option for segmented encoding - - Stops x264 from attempting to optimize global stream headers, ensuring that - different segments of a video will have identical headers when used with - identical encoding settings. - -commit 9143d5ad966a3864597009ba1f1befe87328ec61 -Author: Fiona Glaser -Date: Thu Jun 27 08:29:06 2013 -0700 - - Interface: if vbv-maxrate < bitrate, set bitrate = vbv-maxrate - - This probably makes more sense to the user than setting vbv-maxrate = bitrate, - as before. - -commit 83d35c7bc4332e4dd27ba7b8baf96f8743c52a8b -Author: Anton Mitrofanov -Date: Tue May 28 05:02:42 2013 -0700 - - OpenCL cosmetics - -commit ffc3ad4945da69f3caa2b40e4eed715a9a8d9526 -Author: Anton Mitrofanov -Date: Tue Jun 18 00:16:33 2013 +0400 - - Fix possible crash when writing very large filler NALUs - - Bitstream-reallocation function didn't handle the case of filler. - -commit 25ef3f5fdbfca0f9a5ff8a97b8475e7f8b4c9202 -Author: Loren Merritt -Date: Mon Jun 17 11:27:09 2013 -0700 - - Fix build with PIC on some systems - -commit c41b629d4831cde47a8c0cde435041cc3b996d85 -Author: Henrik Gramner -Date: Sun Jun 2 18:41:17 2013 +0200 - - Fix potential misaligment crash in AVX2 denoise_dct - -commit e32d9c21339cbb021d6c9ad5897bfde09dcdb63a -Author: Anton Mitrofanov -Date: Tue May 28 01:48:15 2013 +0400 - - Fix building with compilers without inline asm support - - Also fix crash in high bit depth builds compiled with unaligned stack. - -commit 3b8e924639ac67a4beb0ebe9b9663de03cdce84d -Author: Anton Mitrofanov -Date: Wed May 22 22:43:59 2013 +0400 - - Fix compilation with OpenCL on MacOS X - - Also fix crash in the case of OpenCL error during encoding. - -commit 3aa9a67b6d62bcf11ee69397647230700a32044b -Author: Anton Mitrofanov -Date: Mon May 6 22:51:11 2013 +0400 - - OpenCL support improvement/refactoring - - Autoload the OpenCL library so that it's not required to run an openCL-enabled - build of x264. - - Update X264_BUILD, which should have been changed with the first patch. - -commit 0b2c3d35c168011e73300da5fdc690e00a8238e0 -Author: Fiona Glaser -Date: Thu May 16 13:51:37 2013 -0700 - - x86: shave a few instructions off AVX deblock - -commit e7cb328580c3e1bd7604a64f40abf3e03c474771 -Author: Henrik Gramner -Date: Tue May 14 18:57:40 2013 +0200 - - x86: AVX2 dequant_4x4_dc - -commit edf31ed3577f35e7ed3934dd74be474f9d22384a -Author: Henrik Gramner -Date: Tue May 14 18:53:12 2013 +0200 - - x86: AVX2 high bit-depth dequant - -commit bc88d1bb331ee061c38bea80f7a54a76797c31d0 -Author: Fiona Glaser -Date: Thu May 9 17:20:05 2013 -0700 - - x86-64: 64-bit variant of AVX2 hpel_filter - - ~5% faster than 32-bit. - -commit 89f067b7cacecf413569e84c6c973c23f67b1ad3 -Author: Henrik Gramner -Date: Mon May 6 18:41:24 2013 +0200 - - x86: AVX2 high bit-depth denoise_dct - - 28->15 cycles - - Also reorder instructions to use fewer registers, 3 cycles faster on Ivy Bridge with 64-bit Windows. - -commit 481e4cdb52989e4b514a2f4345870a19c5c0ae92 -Author: Henrik Gramner -Date: Sat May 4 18:48:58 2013 +0200 - - x86: AVX2 high bit-depth quant - - quant_4x4: 13->6 cycles - quant_4x4_dc: 14->8 cycles - quant_8x8: 47->24 cycles - quant_4x4x4: 48->25 cycles - -commit 02aa1368da5c222c8833724abccddd8f02630fc6 -Author: Fiona Glaser -Date: Wed May 1 14:32:11 2013 -0700 - - x86: AVX2 add16x16_idct_dc - - 27 -> 19 cycles - -commit 0c00c2c7882de130184e02cf1861599aedb425e8 -Author: Fiona Glaser -Date: Mon Apr 29 16:16:54 2013 -0700 - - x86: faster AVX2 quant_4x4x4 - - 10->9 cycles - -commit af6647e0e7d647c660003f65b78b4f1a0b186ec2 -Author: Fiona Glaser -Date: Sat Apr 27 21:03:32 2013 -0700 - - x86: AVX2 intra_sad_x3_8x8c - - 30->22 cycles - -commit f114746df6ce6a1bcacf46c62b696cc309ab4527 -Author: Henrik Gramner -Date: Sun Apr 28 11:11:03 2013 +0200 - - x86: AVX2 high bit-depth intra_sad_x3_8x8 - - 43->24 cycles - -commit 8e4f045f815a59ca3d6398ff4ddae7af44766dc8 -Author: Fiona Glaser -Date: Wed Apr 24 14:22:15 2013 -0700 - - x86: AVX2 deblock strength - - 30->18 cycles - -commit 594dd84cb85e616f4e260f7fdef6ce5a34360ac7 -Author: Henrik Gramner -Date: Wed May 1 17:42:48 2013 +0200 - - x86: Faster high bit-depth intra_sad_x3_4x4 - - 20->16 cycles on Ivy Bridge - -commit a8384178bd917576469da040923976cb531be38c -Author: Fiona Glaser -Date: Tue Apr 30 17:36:46 2013 -0700 - - x86: faster SSSE3 hpel - - ~7% faster using the pmulhrsw trick from mc_chroma. - -commit 1f5a32c2459ed6f42d9c150d008e3471d61af3ee -Author: Fiona Glaser -Date: Mon Apr 29 14:22:23 2013 -0700 - - x86-64: faster SSSE3 trellis - - ~2% faster trellis. - -commit 7cbb27f0ce5ea3e756c628ac606f65d7de57f285 -Author: Fiona Glaser -Date: Thu May 2 17:10:26 2013 -0700 - - x86: 32-byte align the stack if possible - - Avoids the need for manual 32 byte array alignment on compilers that support - -mpreferred-stack-boundary. - -commit 30c91f62906ce08b5d227002b38ebd64f1291fae -Author: Henrik Gramner -Date: Sat May 11 23:39:09 2013 +0200 - - x86inc: Utilize the shadow space on 64-bit Windows - - Store XMM6 and XMM7 in the shadow space in functions that clobbers them. - This way we don't have to adjust the stack pointer as often, - reducing the number of instructions as well as code size. - -commit 33c352673900bd1b362bb2fe0284e999fccd633d -Author: Henrik Gramner -Date: Fri May 3 23:06:10 2013 +0200 - - x86: Don't use explicitly aligned versions of SAD on AVX CPUs - - On modern CPUs movdqu isn't slower than movdqa when used on aligned data and using the same code in both cases saves cache. - - This was already done for the high bit-depth AVX2 implementation but the aligned version still exists as dead code so remove that. - -commit 16d037211f1dd032288e25ab74d93a569fd93d6c -Author: Henrik Gramner -Date: Fri May 3 20:18:03 2013 +0200 - - x86: Add missing initializations for high bit-depth sad_aligned - -commit 25e219ad2565e52a6962eb1e16cf19f3482e655b -Author: Fiona Glaser -Date: Mon May 13 16:52:18 2013 -0700 - - x86: add Jaguar CPU detection - -commit c1e37099627b1dc2f15b295aa4c2eedd431a6dba -Author: Henrik Gramner -Date: Tue May 7 17:21:03 2013 +0200 - - x86inc: Remove .rodata kludges - - The Mach-O bug was fixed in yasm 0.8.0 and we don't support versions that old. - - a.out was superseded by ELF on sane systems a few decades ago. - -commit 5444e95a5c9ee866625b1122a19dbae6bf044008 -Author: Henrik Gramner -Date: Sat May 4 16:21:32 2013 +0200 - - checkasm: Use 64-bit cycle counters - - Prevents overflows that can occur in some cases. - -commit 0e000e7a763c9bb5c14257bad365144025013fc9 -Author: Henrik Gramner -Date: Fri May 10 13:55:32 2013 +0200 - - checkasm: Fix stack alignment bug - -commit 3ba0fb847b1a14f9db5f3dabe209eee2d4edc91d -Author: Fiona Glaser -Date: Wed May 8 10:48:41 2013 -0700 - - Fix invalid memcpy in sliced-threads - - Likely didn't actually break in practice, but memcpy with src==dst - is incorrect. - -commit 7f3606572957b63f1169bc793ed55bccdb549d56 -Author: Fiona Glaser -Date: Mon Apr 29 12:14:01 2013 -0700 - - Fix two bugs in slice-min-mbs and slices-max - - Slices-max broke slice-max-size when slice-max wasn't used. - Slice-min-mbs broke in rare cases near the end of a threadslice. - -commit 67d6f602018d0fc1cb05cd6240e4fe1c2646169f -Author: Fiona Glaser -Date: Thu Apr 4 18:00:23 2013 -0700 - - x86: SSSE3 LUT-based faster coeff_level_run - - ~2x faster coeff_level_run. - Faster CAVLC encoding: {1%,2%,7%} overall with {superfast,medium,slower}. - Uses the same pshufb LUT abuse trick as in the previous ads_mvs patch. - -commit c17d12f83381913650d84004815c20a1f7092144 -Author: Fiona Glaser -Date: Mon Mar 25 14:03:37 2013 -0700 - - x86-64: BMI2 cabac_residual functions - -commit 40316f836d42cb5aee8de5ae6b4a5e417d8446f8 -Author: Fiona Glaser -Date: Wed Mar 20 15:08:35 2013 -0700 - - x86: SSSE3 ads_mvs - - ~55% faster ads in benchasm, ~15-30% in real encoding. - ~4% faster "placebo" preset overall. - -commit 03396f82bd1a709aa83d15de0affd0c4c5bd621d -Author: Henrik Gramner -Date: Tue Apr 16 23:27:53 2013 +0200 - - x86: AVX2 pixel_ssd_nv12_core - -commit dc05aebbc51b64b6cf3cfa95a1fbb20f6ffe94c6 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:50 2013 +0200 - - x86: AVX2 high bit-depth pixel_ssd - -commit f49c2eba352a9087301dfc3c3de902ab083bd9e9 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:46 2013 +0200 - - x86: AVX2 high bit-depth pixel_sad_x3/pixel_sad_x4 - - Also reduce the number of xmm registers used by sse2/ssse3 pixel_sad_x3. - -commit 0e69048d4f9664f1293c5eed0604522c67adaff5 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:43 2013 +0200 - - x86: AVX2 high bit-depth vsad - -commit 9f885c112d6566388d472da68ada0301ce330311 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:39 2013 +0200 - - x86: AVX2 high bit-depth pixel_sad - - Also use loops instead of duplicating code; reduces code size by ~10kB with - negligible effect on performance. - -commit 295f83af2afa93073d7810ab96b1d8d889a53ed2 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:35 2013 +0200 - - x86: AVX2 high_bit_depth pixel_avg2, get_ref, mc_copy_w16, mc_luma - - Also reduce the number of xmm registers used by mc_copy_* to avoid - saving and restoring xmm6 and xmm7 on 64-bit Windows. - -commit e7a46b6536ab3ea4806f585b771b6cbb261031d1 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:32 2013 +0200 - - x86: AVX2 nal_escape - - Also rewrite the entire function to be faster and drop the AVX version which is no longer useful. - -commit 547a6573af56afe8d551201245775c6ba179e781 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:29 2013 +0200 - - x86: AVX memzero_aligned - -commit 0f776f63daf47eac9b69ef77aaf7c9c16213cba9 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:25 2013 +0200 - - x86: AVX2 predict_16x16_dc - -commit 97ad171ae33c51f48e6214abdf7c978e4dd5d2d1 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:22 2013 +0200 - - x86: AVX2 predict_8x8c_p/predict_8x16c_p - -commit 8ecdeb2709b4b7095237330e68e9a76ea8060a2f -Author: Henrik Gramner -Date: Tue Apr 16 23:27:18 2013 +0200 - - x86: AVX2 predict_16x16_p - - Also fix the AVX implementation to correctly use the SSSE3 inline asm - instead of SSE2. - -commit f3d521da8163bb9a381284ef0b5c949b8a5c9f9c -Author: Henrik Gramner -Date: Tue Apr 16 23:27:14 2013 +0200 - - x86: AVX high bit-depth predict_16x16_v - - Also restructure some code to reduce code size of various functions, - especially in high bit-depth. - -commit fa40b44f339501917e7a7c003ab826bf3e7b6a10 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:08 2013 +0200 - - x86: AVX2 high bit-depth predict_4x4_h - -commit 7908dc632330b6028ab7dae42834e2098e628b24 -Author: Henrik Gramner -Date: Tue Apr 16 23:27:04 2013 +0200 - - x86: AVX2 high bit-depth predict_16x16_h - -commit 51708c3e193438439aaeaf31c377b070ca403e0e -Author: Henrik Gramner -Date: Tue Apr 16 23:27:00 2013 +0200 - - x86: AVX2 high bit-depth predict_8x8c_h/predict_8x16c_h - -commit 184c50554ae95aa60edd3fa309ca8013e00a8648 -Author: Henrik Gramner -Date: Tue Apr 16 23:26:47 2013 +0200 - - x86util: Support ymm registers in HADD macros - -commit 0ea5be852e97d8cfdf04e384a8a78210f87c2dc0 -Author: Fiona Glaser -Date: Tue Feb 26 16:26:34 2013 -0800 - - x86: more AVX2 framework, AVX2 functions, plus some existing asm tweaks - - AVX2 functions: - mc_chroma - intra_sad_x3_16x16 - last64 - ads - hpel - dct4 - idct4 - sub16x16_dct8 - quant_4x4x4 - quant_4x4 - quant_4x4_dc - quant_8x8 - SAD_X3/X4 - SATD - var - var2 - SSD - zigzag interleave - weightp - weightb - intra_sad_8x8_x9 - decimate - integral - hadamard_ac - sa8d_satd - sa8d - lowres_init - denoise - -commit 19e1a2bbf2d1aaa15ea2d2c118b0236ff64b4bd1 -Author: Loren Merritt -Date: Mon Feb 25 21:16:45 2013 +0000 - - x86inc: create xm# and ym#, analagous to m# - - For when we want to mix simd sizes within one function. - -commit 3a8dfb2bc62be21215b6f7d47c53c5a912878656 -Author: Fiona Glaser -Date: Fri Apr 5 16:08:35 2013 -0700 - - x86inc: fix AVX emulation of cmp(p|s)(s|d) - -commit a3f5c7326c0aa707ccbd5a938a0b65581888b549 -Author: Fiona Glaser -Date: Tue Feb 5 17:15:00 2013 -0800 - - x86-64: cabac_block_residual assembly - - RDO: ~20% faster than C - Bitstream: ~50% faster than C - 1-2% faster overall, highest on preset superfast/fast/medium. - -commit f49a1b2ef6d95d8f0f186df0fc3bfe38414e264f -Author: Steve Borho -Date: Thu Feb 21 12:48:40 2013 -0600 - - OpenCL lookahead - - OpenCL support is compiled in by default, but must be enabled at runtime by an - --opencl command line flag. Compiling OpenCL support requires perl. To avoid - the perl requirement use: configure --disable-opencl. - - When enabled, the lookahead thread is mostly off-loaded to an OpenCL capable GPU - device. Lowres intra cost prediction, lowres motion search (including subpel) - and bidir cost predictions are all done on the GPU. MB-tree and final slice - decisions are still done by the CPU. Presets which do not use a threaded - lookahead will not use OpenCL at all (superfast, ultrafast). - - Because of data dependencies, the GPU must use an iterative motion search which - performs more total work than the CPU would do, so this is not work efficient - or power efficient. But if there are spare GPU cycles to spare, it can often - speed up the encode. Output quality when OpenCL lookahead is enabled is often - very slightly worse in quality than the CPU quality (because of the same data - dependencies). - - x264 must compile its OpenCL kernels for your device before running them, and in - order to avoid doing this every run it caches the compiled kernel binary in a - file named x264_lookahead.clbin (--opencl-clbin FNAME to override). The cache - file will be ignored if the device, driver, or OpenCL source are changed. - - x264 will use the first GPU device which supports the required cl_image - features required by its kernels. Most modern discrete GPUs and all AMD - integrated GPUs will work. Intel integrated GPUs (up to IvyBridge) do not - support those necessary features. Use --opencl-device N to specify a number of - capable GPUs to skip during device detection. - - Switchable graphics environments (e.g. AMD Enduro) are currently not supported, - as some have bugs in their OpenCL drivers that cause output to be silently - incorrect. - - Developed by MulticoreWare with support from AMD and Telestream. - -commit 2d0c47a50622ec59ade303cf150c21b8910a2bce -Author: Fiona Glaser -Date: Mon Mar 4 15:19:47 2013 -0800 - - weightp: improve scale/offset search, chroma - - Rescale the scale factor if the offset clips. This makes weightp more effective - in fades to/from white (and an other situation that requires big offsets). - - Search more than 1 scale factor and more than 1 offset, depending on --subme. - - Try to find the optimal chroma denominator instead of hardcoding it. - - Overall improvement: a few percent in fade-heavy clips, such as a sample from - Avatar: TLA. - -commit 732e4f7e8b9ab6d214cbcf059445b4712709faa4 -Author: Fiona Glaser -Date: Tue Feb 19 13:48:44 2013 -0800 - - Add slices-max feature - - The H.264 spec technically has limits on the number of slices per frame. x264 - normally ignores this, since most use-cases that require large numbers of - slices prefer it to. However, certain decoders may break with extremely large - numbers of slices, as can occur with some slice-max-size/mbs settings. - - When set, x264 will refuse to create any slices beyond the maximum number, - even if slice-max-size/mbs requires otherwise. - -commit fdfffa3058cb590765dbb34afa5706755dcb5319 -Author: Fiona Glaser -Date: Thu Feb 14 17:22:02 2013 -0800 - - Add slice-min-mbs feature - - Works in conjunction with slice-max-mbs and/or slice-max-size to avoid overly - small slices. - Useful with certain decoders that barf on extremely small slices. - - If slice-min-mbs would be violated as a result of slice-max-size, x264 will - exceed slice-max-size and print a warning. - -commit 8a3a41de9e5f54cb6e7b5c69486e50471a5c022d -Author: Anton Mitrofanov -Date: Tue Mar 26 18:56:21 2013 +0400 - - Disable mbtree asm with cpu-independent option - - Results vary between versions because of different rounding results. - -commit bf52bab4e5607d7f3d98b3999a13cb8149aeef1c -Author: Anton Mitrofanov -Date: Tue Mar 26 18:30:00 2013 +0400 - - Show "avs: no" --disable-avs option instead of empty string - -commit e74287e93b0ee7afb384624f60dc440b736fec6b -Author: Tim Walker -Date: Tue Mar 19 23:42:43 2013 +0100 - - lavf input: don't use deprecated AVStream fields - - Fixes building against newer libavcodecs from the Libav project. - -commit aa73459b710f4c08b654d69573c22fd2fc2a99f8 -Author: Anton Mitrofanov -Date: Tue Mar 26 19:54:36 2013 +0400 - - Fix y4m input with C420paldv colorspace - -commit 42c500af62fbe09e7a55ecd47fc72331fbe4ae02 -Author: Fiona Glaser -Date: Sat Mar 2 01:22:29 2013 -0800 - - x86: correctly check stack alignment for Atom hadamard_ac - - Regression in r2265 (only affected compilers with broken stack alignment, - like ICL on win32). - -commit bed18d0e4545e7528bf585a1a3c7fbc05ddbafa4 -Author: Loren Merritt -Date: Mon Feb 25 21:23:55 2013 +0000 - - x86inc: fix some corner cases of SWAP - - SWAP with >=3 named (rather than numbered) args - PERMUTE followed by SWAP with 2 named args - used to produce the wrong permutation - -commit 3cdaca1ac2f6022b1affcd24eff397a03b59fce3 -Author: Fiona Glaser -Date: Wed Feb 27 13:30:22 2013 -0800 - - Fix array overreads that caused miscompilation in gcc 4.8 - -commit 37033444036210ddab75c3ec5b9b5c2a5abb9d52 -Author: Fiona Glaser -Date: Thu Feb 28 13:32:37 2013 -0800 - - Fix undefined behavior in x264_ratecontrol_mb - -commit cb4547aefb624105b622368aad62c947f89cc4b1 -Author: Stefan Groenroos -Date: Fri Mar 1 22:35:34 2013 +0200 - - ARM: Fix bug in x264_quant_4x4x4_neon - - Regression in r2273. - -commit 3a8baa0ec68c50db3194ed778d0e744d6311cda3 -Author: Stefan Groenroos -Date: Mon Feb 25 23:43:09 2013 +0200 - - ARM: update NEON mc_chroma to work with NV12 and re-enable it - - Up to 10-15% faster overall. - -commit 215f2beeadb2ade3a318b397f25b8a6ad3a761d1 -Author: Fiona Glaser -Date: Thu Feb 14 15:00:48 2013 -0800 - - CABAC/CAVLC: use the new bit-iterating macro here too - -commit 993c81e94eebaacddbbfcec665831d07d89490b7 -Author: Fiona Glaser -Date: Fri Feb 8 15:34:38 2013 -0800 - - quant_4x4x4: quant one 8x8 block at a time - - This reduces overhead and lets us use less branchy code for zigzag, dequant, - decimate, and so on. - Reorganize and optimize a lot of macroblock_encode using this new function. - ~1-2% faster overall. - - Includes NEON and x86 versions of the new function. - Using larger merged functions like this will also make wider SIMD, like - AVX2, more effective. - -commit 5ee1d03a8b86915d98b165d067dce377df3a87ba -Author: Stephen Hutchinson -Date: Tue Feb 12 21:55:43 2013 -0500 - - Add AvxSynth support to the AviSynth input module. - - Uses dlopen to load AvxSynth on Linux and OS X. - - Allows the use of --demuxer avs for AvxSynth, though the only source filter it - can currently use is FFMS2. - - Add a local copy of avxsynth_c.h and its dependent headers in extras/ so that - users don't need to actually have AvxSynth development headers installed to - enable support for it (mirroring the AviSynth behavior). - - Based on a patch by 0x09 (tab@lavabit.com) - -commit 7b1301e946218cfe6e072fea03702754ee0cc8a6 -Author: Fiona Glaser -Date: Fri Feb 8 00:13:15 2013 -0800 - - Eliminate some branchiness in ME/analysis - - Faster, fewer branch mispredictions. - -commit 7de9a9aa4bc06843dd7d8afe6bc42c02e27b6b73 -Author: Fiona Glaser -Date: Wed Feb 6 16:55:39 2013 -0800 - - Fix some store forwarding stalls - There's quite a few others, but most of them don't help to fix or there's no - easy way to avoid them. - -commit 68a6268bae989c55a02b7e86b169bd1a02793a95 -Author: Fiona Glaser -Date: Tue Feb 5 01:23:23 2013 -0800 - - x86: faster AVX satd/sa8d/sa8d_satd/hadamard_ac - - Use Conroe-style movddup in AVX transforms; both Sandy Bridge and Bulldozer - do movddup in the load unit, so it's totally free this way. - - On Sandy Bridge: - ~6% faster sa8d_satd - ~5% faster hadamard_ac - ~9% faster 32-bit satd - ~2% faster sa8d - -commit 5d60b9c9ad794a666d0cfe0dd9d66d5b9f58e033 -Author: Fiona Glaser -Date: Sat Feb 2 12:37:08 2013 -0800 - - x86: detect Bobcat, improve Atom optimizations, reorganize flags - - The Bobcat has a 64-bit SIMD unit reminiscent of the Athlon 64; detect this - and apply the appropriate flags. - - It also has an extremely slow palignr instruction; create a flag for this to - avoid massive penalties on palignr-heavy functions. - - Improve Atom function selection and document exactly what the SLOW_ATOM flag - covers. - - Add Atom-optimized SATD/SA8D/hadamard_ac functions: simply combine the ssse3 - optimizations with the sse2 algorithm to avoid pmaddubsw, which is slow on - Atom along with other SIMD multiplies. - - Drop TBM detection; it'll probably never be useful for x264. - - Invert FastShuffle to SlowShuffle; it only ever applied to one CPU (Conroe). - - Detect CMOV, to fail more gracefully when run on a chip with MMX2 but no CMOV. - -commit 75d927053ef5546eb011ff5a5ac19152dd4e3c63 -Author: Oskar Arvidsson -Date: Sat Jan 19 01:47:09 2013 +0100 - - x86: combined SA8D/SATD dsp function - - Speedup is most apparent for 8-bit (~30%), but gives some improvements - for 10-bit too (~12%). - 64-bit only for now. - -commit 790c648d939240808659228f57a22633fc59d6d8 -Author: Oskar Arvidsson -Date: Tue Jan 29 23:44:32 2013 +0100 - - x86: port SSE2+ SATD functions to high bit depth - - Makes SATD 20-50% faster across all partition sizes but 4x4. - -commit 93bf1248f7409958818b281e3e6ecca75ddb8d86 -Author: Oskar Arvidsson -Date: Wed Feb 6 02:07:53 2013 +0100 - - x86: faster high bit depth ssd - - About 15% faster on average. - -commit 6371c3a527a337c7521912990c89d0474288e105 -Author: Fiona Glaser -Date: Fri Jan 18 22:55:46 2013 -0800 - - x86: optimize and clean up predictor checking - Branchlessly handle elimination of candidates in MMX roundclip asm. - Add a new asm function, similar to roundclip, except without the round part. - Optimize and organize the C code, and make both subme>=3 and subme<3 consistent. - Add lots of explanatory comments and try to make things a little more understandable. - ~5-10% faster with subme>=3, ~15-20% faster with subme<3. - -commit 004640653ded52f447ffdb71a45b334dc8e6f3d1 -Author: Fiona Glaser -Date: Tue Jan 22 12:31:55 2013 -0800 - - Fix two bugs in predictor checking - pmv wasn't checked properly in some cases, as well as zero vector. - Output-changing portion of the following patch. - -commit d2a9d25429b6843874865a37a5b4f6b401d89abc -Author: Fiona Glaser -Date: Thu Jan 10 13:15:52 2013 -0800 - - Improve lookahead-threads auto selection - Smarter decision to improve fast-first-pass performance in 2-pass encodes. - Dramatically improves CPU utilization on multi-core systems. - - Tested on a quad-core Ivy Bridge (12 threads, 1080p): - Fast first pass: - veryfast: ~7% faster - faster: ~11% faster - fast/medium: ~15% faster - slow/slower: ~42% faster - veryslow: ~55% faster - CRF/1-pass: - veryfast: ~9% faster - (all others remained the same) - -commit 5a764328bdeba650d99fc8db47275708cce79521 -Author: Henrik Gramner -Date: Sun Jan 27 23:01:59 2013 +0100 - - x86: Use SSE instead of SSE2 for copying data - - Reduces code size because movaps/movups is one byte shorter than movdqa/movdqu. - Also merge MMX and SSE versions of memcpy_aligned into a single macro. - -commit c3983b811f42ae5e4bc4f9c1c919f8e548fc76e3 -Author: Henrik Gramner -Date: Sun Jan 13 18:27:08 2013 +0100 - - 64-bit cabac optimizations - - ~4% faster PIC - - WIN64: - ~3% faster and 16 byte shorter cabac_encode_bypass - ~8% faster cabac_encode_terminal - Benchmarked on Ivy Bridge - - UNIX64: - One instruction less in cabac_encode_bypass - -commit f6e0d28ae1bccbda43d95200162f7035661fe1e4 -Author: Mike Gorchak -Date: Sat Feb 2 23:35:00 2013 -0800 - - configure: add QNX support - -commit 5e0fca86444840752eaedbdc5ebfe4ac0b3a0053 -Author: Henrik Gramner -Date: Sun Jan 20 19:35:06 2013 +0100 - - Windows: Enable DEP and ASLR - -commit 5ec5c78920914a88da415c57904fa01c99deeb7b -Author: Henrik Gramner -Date: Thu Jan 17 19:17:24 2013 +0100 - - x86inc: Set ELF hidden visibility for global constants - -commit fd2c4a06c3a4eb02fc1375de782bc5d36eb1d744 -Author: Diego Biurrun -Date: Thu Jan 17 11:18:31 2013 +0100 - - x86inc: Add cvisible macro for C functions with public prefix - - This allows defining externally visible library symbols. - - Signed-off-by: Diego Biurrun - -commit faf3dbe616c8339590409c9aa25777fa76c987a6 -Author: Diego Biurrun -Date: Thu Jan 17 11:30:37 2013 -0800 - - x86inc: rename program_name to private_prefix - Synced from libav. - The new name is more descriptive and will allow defining a separate public - prefix for externally visible library symbols. - -commit 32695340b0e93e3cc7edd1b5e7db064d94cd3701 -Author: Fiona Glaser -Date: Mon Jan 14 05:35:30 2013 -0800 - - x264.h: improve x264_encoder_reconfig documentation - -commit 6a82e49370e46914ab479d57548508ccf29da6e5 -Author: Henrik Gramner -Date: Sat Feb 16 19:36:50 2013 +0100 - - Cosmetics: stricter definition of parameterless functions - -commit b671762973a162705ceacf924a29999cdc6d35d2 -Author: Neil -Date: Mon Jan 28 10:47:38 2013 +0800 - - Update "Install and compile x264" in doc/regression_test.txt - -commit 43ff8f1681c1cca997ca916508723abea85d0fa2 -Author: Anton Mitrofanov -Date: Thu Jan 24 12:11:26 2013 +0400 - - Fix possible non-determinism with mbtree + open-gop + sync-lookahead - - Code assumed keyframe analysis would only pull one frame off the list; this - isn't true with open-gop. - -commit c2c2a95708685156a643e920b497d48597e0267c -Author: Anton Mitrofanov -Date: Mon Feb 25 19:28:19 2013 +0400 - - x86: don't use the red zone on win64 - -commit 5743b19a8264415ab3ed443abd2fefd81a038d6a -Author: Fiona Glaser -Date: Sun Feb 10 16:12:34 2013 -0800 - - x86-64: fix trellis asm with interlacing - - Regression in r2145. - Assembly assumed array was [2][64] when it was actually [2][63]. - Tiny (~0.1%) compression improvement. - -commit 9475e6ac48af90e526611b5f11a2690fa077b0ba -Author: Ronald S. Bultje -Date: Wed Jan 30 09:48:14 2013 -0800 - - x86-32: use simple nop codes for <= sse - - The "CentaurHauls family 6 model 9 stepping 8" family of CPUs (flags: - fpu vme de pse tsc msr cx8 sep mtrr pge mov pat mmx fxsr sse up rng - rng_en ace ace_en) SIGILLs on long nop codes. - -commit 732b072ae236b57cabdbc3b31cd7b482d1f9f9ff -Author: Loren Merritt -Date: Tue Jan 8 21:30:57 2013 +0000 - - Bump dates to 2013 - -commit f2b4f29c636d5e5c223650c5b22bd8089adfcab9 -Author: Henrik Gramner -Date: Mon Dec 17 21:54:00 2012 +0100 - - x86inc: Drop tzcnt workaround - - It is no longer needed now that we've bumped the version requirement of yasm to 1.2.0. - -commit ccda1ba4d8d902945c68aa25ec20867055d1b079 -Author: Fiona Glaser -Date: Mon Nov 12 10:28:53 2012 -0800 - - AVX2/FMA3 version of mbtree_propagate - First AVX2 function for testing. - Bump yasm version to 1.2.0 for AVX2 support. - -commit 8a9608bbbdf77ceb3ee537271549111468175a2b -Author: Henrik Gramner -Date: Tue Dec 11 16:05:34 2012 +0100 - - x86inc: Use VEX-encoded instructions in AVX functions - Automatically use VEX-encoding in AVX/AVX2/XOP/FMA3/FMA4 functions for all instructions that exists in a VEX-encoded version. - This change makes it easier to extend existing code to use AVX2. - Also add support for AVX emulation of a few instructions that were missing before. - -commit 4cf272851a9d24aacdf664f27a87ebdbfb50e6c2 -Author: Loren Merritt -Date: Sun Dec 2 15:56:30 2012 +0000 - - x86inc: activate REP_RET automatically - Now RET checks whether it immediately follows a branch, so the programmer dosen't have to keep track of that condition. - REP_RET is still needed manually when it's a branch target, but that's much rarer. - The implementation involves lots of spurious labels, but that's ok because we strip them. - -commit b073e870d135ac27cd97d624330abf0f1fb1ed41 -Author: Ronald S. Bultje -Date: Thu Dec 6 15:40:13 2012 -0800 - - x86inc: support stack mem allocation and re-alignment in PROLOGUE - Use this in 8-bit loopfilter functions so they can be used if - there is no aligned stack (e.g. x86-32 MSVC or ICC 10.x). - -commit 9d5ec55b34a4d4f2e044fbc67e2e12a59ea27d2a -Author: Henrik Gramner -Date: Mon Dec 17 22:15:02 2012 +0100 - - Update config.guess and config.sub - -commit 8eddd52b6d5d638709c5c8278c420eac68a8dde1 -Author: Anton Mitrofanov -Date: Tue Jan 8 13:29:49 2013 -0800 - - Fix crash if the first frame is forced to a non-keyframe - This is obviously bad user input, but x264 shouldn't crash if it happens. - -commit 05c1646333f567aa3de5f7669693b15ee667825d -Author: Bernhard Rosenkränzer -Date: Sun Dec 30 12:18:00 2012 -0800 - - Fix build on ARM with binutils >= 2.23.51.0.6 - GAS doesn't seem to like spaces in vld1 anymore, so remove those. - -commit 23829dd2b2c909855481f46cc884b3c25d92c2d7 -Author: Anton Mitrofanov -Date: Fri Nov 23 18:26:53 2012 +0400 - - Fix pthread_join emulation on win32 and BeOS - Doesn't actually affect x264, but it's more correct. - -commit 042fdd3e6a0e271f62a108da2a1a244dee936045 -Author: Fiona Glaser -Date: Tue Nov 27 07:50:51 2012 -0800 - - Fix typo in r2222 - Slightly wrong numbers in level table. - -commit cd71765c0ba574bb573e75396ef3c6a5c4c00469 -Author: Sergio Basto -Date: Thu Nov 22 18:02:50 2012 -0800 - - configure: fix gpac detection with -Wp,-D_FORTIFY_SOURCE=2 - -commit 12458a23d1374836fecbed381dfe55513b5ba119 -Author: Sean McGovern -Date: Thu Nov 22 18:01:16 2012 -0800 - - Solaris: use sysconf to get processor count - Solaris responds correctly to the same value as Cygwin, so let's use that. - -commit 0db80bee2765676c2e0e4be21afc2ace900a606c -Author: Anton Khirnov -Date: Tue Nov 13 21:01:24 2012 +0100 - - lavf input: allocate AVFrame correctly - Allocate AVFrames correctly with avcodec_alloc_frame(). - This caused crashes with newer libavcodecs that try to free frame extradata. - -commit 144b79159ad20954a7faec1023451a630a65aea1 -Author: Anton Mitrofanov -Date: Sun Nov 11 03:44:02 2012 +0400 - - Fix crash when using libx264.dll compiled with ICL for X86_64 - -commit bfed708c5358a2b4ef65923fb0683cefa9184e6f -Author: Anton Mitrofanov -Date: Fri Nov 9 02:31:10 2012 +0400 - - Fix possible issues with out-of-spec QP values - Fixes a possible regression in r2228. - -commit 1580a74e339c59cd856100076d8cf46f2d7247b0 -Author: Fiona Glaser -Date: Wed Sep 26 13:49:02 2012 -0700 - - Attempt to optimize PPS pic_init_qp in 2-pass mode - Small compression improvement; up to ~0.5% in extreme cases. - Helps more with small slice sizes (tiny resolutions or slice-max-size). - Note that this changes the 2-pass stats file format. - -commit b304a7cad10a85d487fa09e7c33e81c6945186b2 -Author: Fiona Glaser -Date: Wed Sep 26 13:05:00 2012 -0700 - - Improve slice header QP selection - Use the first macroblock of each slice instead of the last of the previous. - Lets us pick a reasonable initial QP for the first slice too. - Slightly improved compression. - -commit 0d5f6fbae9f6c4dbba25571a5d8c643b192606b1 -Author: Fiona Glaser -Date: Thu Oct 11 13:27:48 2012 -0700 - - Update level dpb size calculation to match newer H.264 spec - Doesn't actually change encoding behavior, but makes it more correct. - Warning messages should now be accurate at higher bit depths and non-4:2:0. - Technically, since it redefines x264_level_t, this is an API version increment. - -commit cc61a4b4d0838b6d5f4cdaf88a0b6d06a12b6d3e -Author: Jan Ekström -Date: Sun Oct 7 21:12:05 2012 +0300 - - Add support for the ffmpeg/vapoursynth high bit depth y4m extensions - -commit 5d85879921481ef104766657deda4ef8ea4351ec -Author: Diego Biurrun -Date: Tue Nov 6 14:48:56 2012 +0100 - - x86inc: Rename 3dnow2 to 3dnowext - The name "3dnowext" is more common than "3dnow2". Doesn't affect x264. - -commit 00cc16001b35a71ce2329e02bff6e316201cf700 -Author: Diego Biurrun -Date: Wed Oct 31 12:23:54 2012 -0700 - - x86inc: only define program_name if the macro is unset. - This allows overriding the value from outside the file. - This can be useful if x86inc.asm is used outside of x264. - -commit 3f516c5238d0c536ea03c8e5334d231facf9f31b -Author: David Wolstencroft -Date: Mon Oct 29 09:07:39 2012 -0700 - - Disable ARM NEON MRC CPU test for Apple devices - The Apple A6 CPU doesn't support performance counters, so this test caused a crash. - -commit ac2d7c08452186703424dcc6933524e95b652479 -Author: Fiona Glaser -Date: Tue Nov 6 12:03:20 2012 -0800 - - Fix crash with no-scenecut + mbtree - -commit 480bbc9067da7cce3400cf3988bf5fdfa4d9fa3f -Author: Anton Mitrofanov -Date: Fri Oct 12 23:43:40 2012 +0400 - - Fix reconfiguring to crf=0 - Lossless mode can't currently be enabled mid-stream. - -commit 21ba91ae6c361e4ce49ee65e61cc582b1af648ba -Author: Derek Buitenhuis -Date: Mon Sep 17 11:09:20 2012 -0700 - - Fix ALIGNED_ARRAY_EMU macros on ICL - ICL's preprocessor doesn't handle it correctly. - This fix is similar to libav's fix in 0db2d9. - -commit 96577475981d979d151626aae61ef317dc54df67 -Author: Jason Martens -Date: Thu Sep 13 11:20:40 2012 -0700 - - Fix use of deprecated av_close_input_file call - -commit 02217bd2c31feda7aaca813f104c155fe09428b8 -Author: Brad Smith -Date: Wed Sep 26 14:13:27 2012 -0700 - - Fix pkg-config for dynamic vs static linking - -commit e8e8b9a44ffa9b5f585582375515140ea22985d3 -Author: Brad Smith -Date: Mon Sep 10 17:52:04 2012 -0700 - - Set libm in the configure script if the OS has libm - Prerequisite for another configure patch after this. - Idea copied from libpthread. - -commit 8980dd8afbfeeb6bcaa17b97aad0b3c24207665e -Author: Fiona Glaser -Date: Thu Aug 16 13:40:32 2012 -0700 - - Enhance mb_info: add mb_info_update - This feature lets the callee know which decoded macroblocks have changed. - -commit 033df0a8c719f991ab0e0bb0788bd4f08e8b91d7 -Author: Fiona Glaser -Date: Thu Aug 16 13:01:17 2012 -0700 - - Fix mb_info_free with sliced threads - x264 would free mb_info before it was completely done using it. - -commit f93b7865a96248621af078363d5b59691cbcd8aa -Author: Fiona Glaser -Date: Tue Aug 7 12:43:26 2012 -0700 - - Enhance nalu_process - Add the input frame opaque pointer to the arguments. - This makes it easier to use with multiple simultaneous x264 encodes. - -commit 05089a37bf55a4134d9ffd014fdae729804a4e7a -Author: Fiona Glaser -Date: Mon Aug 6 14:55:35 2012 -0700 - - Improve mb_info constant mb optimization - Allow fast skipping even if the pskip MV isn't zero. - -commit cc5dcedc3b45d8e7390e2e914bb37f3fa92f6acd -Author: Fiona Glaser -Date: Mon Jul 30 12:58:34 2012 -0700 - - Export the average effective CRF of each frame - Useful to judge the resulting quality of a frame when VBV is enabled. - -commit f8fd6412a94f5f4f0eb5f8a6c0fb2062daebfab8 -Author: Brad Smith -Date: Mon Aug 20 23:58:19 2012 -0700 - - Remove special-casing for OpenBSD pthread handling - Previously it was policy to use -pthread, but OpenBSD now recommends -lpthread. - its been libpthread anyway and policy has changed to stop using -pthread. - -commit ed56837e3c56bfb880fac2e4e0025d81d6a7186b -Author: Ronald S. Bultje -Date: Thu Jul 26 18:01:49 2012 -0700 - - x86inc: automatically insert vzeroupper for YMM functions - Backported from libav. - -commit cbb90707e443f0da2521bda1b98cab5705451b8f -Author: Kieran Kunhya -Date: Tue Jul 24 08:47:45 2012 -0700 - - Free user supplied data when deleting a frame - This eliminates a memory leak when calling x264_encoder_close. - -commit 3d03b6190c7af7b941fa746c3dff3b17e5115380 -Author: Fiona Glaser -Date: Wed Jul 18 08:33:41 2012 -0700 - - Revert r2204 - People don't seem to like this so I'm just going to get rid of it. - -commit 2ec694181f8ba3eb1c4153e6b955d399d6448c25 -Author: Fiona Glaser -Date: Tue Jul 10 14:10:44 2012 -0700 - - Faster predictor checking with subme<3 - Fix a typo that made an early-skip less effective. - Avoid a relatively unpredictable branch. - Slightly changed output due to the typo-fix. - ~50 cycles faster on Core i7. - -commit d026397b0bf4c87e96b19c9fff7f43be6c4d9def -Author: Fiona Glaser -Date: Mon Jun 25 18:01:29 2012 -0700 - - Try 8x8 transform analysis even when sub8x8 partitions are present - Turn off the sub8x8 partitions, try it, and turn them back on if it didn't help. - Small compression improvement with p4x4 on (~0.1-0.5%). - Also update related comments. - -commit dea5d7a54b5ba948ed71d74e0264a2191bcd9815 -Author: Fiona Glaser -Date: Fri Jun 8 18:19:59 2012 -0700 - - Support changing resolutions between passes with macroblock-tree - Implement a basic separable bilinear filter to rescale the quantizer offsets. - Structure inspired by swscale, but floating-point instead of fixed-point. - Not as optimized as it could be, but it's quite fast already. - - Example compression penalties on a 720p video game recording: - First pass with 720p and second as 480p: ~-1.5% (vs. same res) - First pass with 480p and second as 720p: ~-3% (vs. same res) - -commit 498af9c559b8da986544e93f898df02fc9e224b3 -Author: Alexander Prikhodko -Date: Tue Jun 12 20:21:35 2012 +0300 - - Print elapsed time in encoding progress indicator - -commit bcd1a7070dc5224d591731dfdbabcbbaee0bb984 -Author: Anton Mitrofanov -Date: Sat Jun 2 21:27:50 2012 +0400 - - Cap ratecontrol predictor parameters - Limits VBV mispredictions after long periods of relatively constant video. - -commit 5754ea2db5223b458bd48f0130c13000e3dec15c -Author: Loren Merritt -Date: Tue Jul 3 14:38:04 2012 -0700 - - x86inc: import patches from libav - Allow manual invocation of WIN64_SPILL_XMM even under INIT_MMX - SSE version of mova is movaps rather than movdqa. - YMM version of movnta. - Add mp size for named arguments. - Fix DEFINE_ARGS when used outside of a cglobal. - Define a few more cpuflags. - 3-argument wrappers for a few more instructions. - -commit 5e3aaf1a49b173df916a384942c8089dd5bd8a22 -Author: Anton Mitrofanov -Date: Fri Jun 22 22:02:24 2012 +0400 - - Fix crash with --fps 0 - Fix some integer overflows and check input parameters better. - Also fix incorrect type specifiers for demuxer info printing. - -commit df700eae5d5ce5732f80df9ce81e6d3fe99ef56a -Author: Fiona Glaser -Date: Tue May 8 15:42:56 2012 -0700 - - Threaded lookahead - - Split each lookahead frame analysis call into multiple threads. Has a small - impact on quality, but does not seem to be consistently any worse. - - This helps alleviate bottlenecks with many cores and frame threads. In many - case, this massively increases performance on many-core systems. For example, - over 100% faster 1080p encoding with --preset veryfast on a 12-core i7 system. - Realtime 1080p30 at --preset slow should now be feasible on real systems. - - For sliced-threads, this patch should be faster regardless of settings (~10%). - - By default, lookahead threads are 1/6 of regular threads. This isn't exacting, - but it seems to work well for all presets on real systems. With sliced-threads, - it's the same as the number of encoding threads. - -commit 7cfe43cc7fb5474a87f02da96ebb850cdf83d73b -Author: Anton Mitrofanov -Date: Fri May 4 17:18:12 2012 +0400 - - Add support for RGB formats in bit-depth conversion filter - -commit 44d2f0885cd95201b67ed54bab88e91f4ba1556e -Author: Anton Mitrofanov -Date: Sat May 12 13:57:49 2012 +0400 - - Fix some bugs in mb_info code - -commit 8e57a9a0b5bddfecea5e45345c8c50efb0bac10d -Author: Fiona Glaser -Date: Thu Mar 29 14:14:07 2012 -0700 - - Add mb_info API for signalling constant macroblocks - Some use-cases of x264 involve encoding video with large constant areas of the frame. - Sometimes, the caller knows which areas these are, and can tell x264. - This API lets the caller do this and adds internal tracking of modifications to macroblocks to avoid problems. - This is really only suitable without B-frames. - An example use-case would be using x264 for VNC. - -commit 4442eaceb4992098e1e4e30aa13e70bb35d2cae6 -Author: Henrik Gramner -Date: Sat Apr 7 00:40:09 2012 +0200 - - Faster chroma weight cost calculation - - New assembly function with SSE2, SSSE3 and XOP implementations for calculating absolute sum of differences. - -commit e8952dffa3b09700e5b7c5e56edd196f0b80a248 -Author: Lucien -Date: Sat Mar 31 13:42:49 2012 +0100 - - Add Level 5.2 support - -commit 66acbbf6ce6b143cd164d251ceb160870e4ee720 -Author: Henrik Gramner -Date: Thu Apr 12 19:14:43 2012 +0200 - - Eradicate all mention of Extended Profile - x264 never supported it and never will because nobody uses it. - -commit b0f44f9e106afadaded17009079c2281cb18eb56 -Author: Anton Mitrofanov -Date: Tue Apr 3 21:46:52 2012 +0400 - - Fix disabling of mbtree when using 2pass encoding and zones - -commit ffea9f51f7f7e0a550c9326631a9c6f8c5c885be -Author: Alexander Prikhodko -Date: Sat Mar 31 12:06:21 2012 +0300 - - configure: force select -mXX gcc option for i386/x86-64 - Makes multilib compilation more convenient. - -commit f4aefb3853819adf633c56062d1be77db90819b6 -Author: Rafaël Carré -Date: Sun Apr 15 21:20:14 2012 -0400 - - Update config.guess and config.sub - Adds support for a bunch of targets, including: - aarch64 (armv8) - arm-linux-androideabi - -commit 62d7007d35c5f0829d96b6ecf459f21d27210ef3 -Author: Alexander Prikhodko -Date: Sat Mar 31 11:33:41 2012 +0300 - - configure: correct use of RC variable and add --extra-rcflags - -commit 70877e39a4abb4c24d1978a28202c9bf0dce8b47 -Author: Steven Walters -Date: Wed Mar 28 21:15:04 2012 -0400 - - ICL/MSVS: Fix shared library generation and usage - MSVS requires exported variables to be declared with the DATA keyword, and requires that imported variables be declared with dllimport. - This does not fix x264 cli being unable to use a shared library built by ICL however. - -commit 52f7a149ef6c39eb0d7eec7884362ba31a4b05ba -Author: Kieran Kunhya -Date: Tue Mar 27 17:38:56 2012 +0100 - - Fix intra-refresh + hrd - -commit fff12b1b7d8ce5cc9cfcfac09f089bae06cac6d5 -Author: Anton Mitrofanov -Date: Sun Mar 25 17:34:24 2012 +0400 - - Fix frame input colorspace check - -commit 065fec2704f3c8c6f3f3f5b0fad6870a078ba48c -Author: Fiona Glaser -Date: Thu Mar 22 13:56:50 2012 -0700 - - Fix comment in deblock.c - The code does, in fact, handle CAVLC+8x8dct correctly already. - -commit bca412764eb198433ca45abd097368e5154c7fbb -Author: Fiona Glaser -Date: Tue Mar 13 14:37:26 2012 -0700 - - Fix sliced-threads ratecontrol bug - Was using qp instead of qscale; could cause NANs (not to mention less accurate results). - -commit e046ba72a4230fdd6c7907ebf7ae235edb98faf2 -Author: Anton Mitrofanov -Date: Sun Mar 11 23:08:18 2012 -0700 - - Fix clobbering of mutex/cvs - Regression in r2183. - Bizarrely seemed to work on many platforms, but crashed on win64 and may have been slower. - Only affected sliced threads during encoding, but could cause crashes on x264 encoder close even without sliced threads. - -commit a155572ed547a3627ef00ca70ab804ff452147cd -Author: Fiona Glaser -Date: Fri Feb 24 13:34:39 2012 -0800 - - Sliced-threads: do hpel and deblock after returning - Lowers encoding latency around 14% in sliced threads mode with preset superfast. - Additionally, even if there is no waiting time between frames, this improves parallelism, because hpel+deblock are done during the (singlethreaded) lookahead. - For ease of debugging, dump-yuv forces all of the threads to wait and finish instead of setting b_full_recon. - -commit 90408ecab16a06ceaa181ff2e495b8f1a9d170fa -Author: Fiona Glaser -Date: Fri Feb 24 13:16:52 2012 -0800 - - Add full-recon API option - Fully reconstruct frames even without dump-yuv. - -commit 5b2c62aec269be7d0b1ff62df09660369f4e20e0 -Author: Fiona Glaser -Date: Wed Feb 22 13:33:36 2012 -0800 - - x86inc: switch to amdnops - Recent AMD CPUs' instruction decoders choke horribly on extremely long nops (i.e. with 4 prefixes). - Won't affect much, since we don't use ALIGN much. - -commit 42db5e6f8f704a2b0a9edf5d9cd4a17d80e5b816 -Author: Fiona Glaser -Date: Tue Feb 14 16:54:03 2012 -0800 - - BMI1 decimate functions - Intel was nice enough to make tzcnt equal to "rep bsf", which is backwards-compatible. - This means we don't actually have to add new functions to make it work. - -commit 92b0bd9665860d7b48f313d6fd72a583ecb01ddf -Author: Fiona Glaser -Date: Tue Feb 14 15:07:10 2012 -0800 - - Minor asm changes - -commit 2535ba17b2598f4155955857c12d52a377a75517 -Author: Fiona Glaser -Date: Thu Feb 9 14:23:52 2012 -0800 - - Add row-reencoding support to VBV for improved accuracy - Extremely accurate, possibly 100% so (I can't get it to fail even with difficult VBVs). - Does not yet support rows split on slice boundaries (occurs often with slice-max-size/mbs). - Still inaccurate with sliced threads, but better than before. - -commit bc473ddfd2f5925715d2895da666e214ebf04c84 -Author: Fiona Glaser -Date: Thu Feb 9 12:38:44 2012 -0800 - - Abstract bitstream backup/restore functions - Required for row re-encoding. - -commit 48e8e52e740fdc7ddca792d4afe240a213f66df5 -Author: Anton Mitrofanov -Date: Thu Feb 9 15:27:53 2012 -0800 - - Add an small per-MB cost penalty for lowres - Helps avoid VBV predictors going nuts with very low-cost MBs. - One particular case this fixes is zero-cost MBs: adaptive quantization decreases the QP a lot, but (before this patch), no cost penalty gets factored in for this, because anything times zero is zero. - -commit 1b31a10c7c3210d5eb14d522aaa0cfbe0e7a25e8 -Author: Fiona Glaser -Date: Mon Feb 13 18:31:51 2012 -0800 - - Remove explicit run calculation from coeff_level_run - Not necessary with the CAVLC lookup table for zero run codes. - -commit 9da19fbee621ca5b052891b3c010f8bc89b2ba93 -Author: Fiona Glaser -Date: Mon Feb 13 13:20:06 2012 -0800 - - Export PSNR/SSIM in x264 API - -commit 3a5f2fe30aeb5314b74f83b1960e9a40776347e9 -Author: Ronald S. Bultje -Date: Wed Feb 8 13:10:31 2012 -0800 - - x86inc: support yasm -f win64 - Not necessary for x264, as -m amd64 already does the right thing, but used by external users of x86inc. - -commit 3131a19cabcdca221ce4cd61a3cff68d99f1a517 -Author: Henrik Gramner -Date: Wed Feb 1 23:52:48 2012 +0100 - - Fix incorrect zero-extension assumptions in x86_64 asm - Some x264 asm assumed that the high 32 bits of registers containing "int" values would be zero. - This is almost always the case, and it seems to work with gcc, but it is *not* guaranteed by the ABI. - As a result, it breaks with some other compilers, like Clang, that take advantage of this in optimizations. - Accordingly, fix all x86 code by using intptr_t instead of int or using movsxd where neccessary. - Also add checkasm hack to detect when assembly functions incorrectly assumes that 32-bit integers are zero-extended to 64-bit. - -commit d52d0b1e6a9323911818c2a89764f6827974e0f7 -Author: Fiona Glaser -Date: Thu Feb 23 09:11:23 2012 -0800 - - Fix possible alignment crash when linking from MSVC - x264_cavlc_init needs to be stack-aligned now. - -commit 0a369502ab83c32ccebdb1888e6981ef872baaf0 -Author: Anton Mitrofanov -Date: Tue Feb 21 12:58:22 2012 -0800 - - Fix rare overflow in 10-bit intra_satd_x3_16x16 asm - -commit 38a26cdfc54ffd60c90651f3b96490d772e6dd73 -Author: Steven Walters -Date: Sat Feb 11 22:56:43 2012 -0500 - - ICL: fix out of tree building and resource file usage on Windows - -commit 10e1ba55803970ecd240f2057e7dfe0c22fc8efb -Author: Oka Motofumi -Date: Mon Feb 6 06:07:34 2012 +0900 - - Add error handling for out-of-tree build - -commit 0fc5acc6e6c038f6380f614e4dc4e1893b716b7e -Author: Anton Mitrofanov -Date: Tue Mar 6 17:34:02 2012 +0400 - - Fix RGB colorspace input - BGR/BGRA input was correct. - -commit 282c3cfb22f4ab526d96678249ccdc7f16531811 -Author: Fiona Glaser -Date: Mon Feb 13 16:40:32 2012 -0800 - - Fix interlaced + extremal slice-max-size - Broke if the first macroblock in the slice exceeded the set slice-max-size. - -commit a37a42450cdc31393dae56aed5a726a42fd540d6 -Author: Henrik Gramner -Date: Sun Feb 5 20:43:09 2012 +0100 - - Fix regression in r2141 - Broke register preservation in x264_cpu_cpuid and x264_cpu_xgetbv. - Did not cause any problems. - -commit ae289e6f03b76afa8736806e683349e8e59fcc93 -Author: Fiona Glaser -Date: Thu Jan 19 14:56:54 2012 -0800 - - TBM, AVX2, FMA3, BMI1, and BMI2 CPU detection support - TBM and BMI1 are supported by Trinity/Piledriver. - The others (and BMI1) will probably appear in Intel's upcoming Haswell. - Also update x86inc with AVX2 stuff. - -commit e0581e0878c1995b215c51691af6bdf7a386946f -Author: Loren Merritt -Date: Fri Feb 3 06:27:18 2012 +0000 - - x86inc: add TAIL_CALL macro to abstract a common asm idiom - -commit 04c38190c60658d544801718fc38fa3f745381d9 -Author: Fiona Glaser -Date: Wed Jan 25 16:44:38 2012 -0800 - - Minor asm optimizations/cleanup - -commit 6d7c5efcf6f8751f768177bf828973a7bd4fdcf6 -Author: Fiona Glaser -Date: Tue Jan 24 19:03:58 2012 -0800 - - Clean up and optimize weightp, plus enable SSSE3 weight on SB/BDZ - Also remove unused AVX cruft. - -commit 047175e610d3d5360f69e4f8168ff6fbafda2465 -Author: Fiona Glaser -Date: Mon Jan 23 18:57:58 2012 -0800 - - XOP frame_init_lowres - Covers both 8-bit and 16-bit, ~5-10% faster on Bulldozer. - -commit abc88d60b5e0d803d6d4f0a5d9ece7dd0bdde0f1 -Author: Fiona Glaser -Date: Tue Jan 17 15:25:10 2012 -0800 - - XOP 8x8 zigzags - Field: 35(mmx) ->16(xop) cycles - Frame: 32(ssse3)->20(xop) cycles - -commit aa47955a0ec65218e8bb967d36689069baca5fd1 -Author: Fiona Glaser -Date: Mon Jan 23 15:09:38 2012 -0800 - - AVX 32-bit hpel_filter_h - Faster on Sandy Bridge. - Also add details on unsuccessful optimizations in these functions. - -commit d7407cf81816fff7ab32ceb2398575724e8cc737 -Author: Fiona Glaser -Date: Fri Jan 27 16:29:30 2012 -0800 - - x86inc: add high halfword register support - Might be useful in a few cases. - -commit acabceb6530d1858bcd009b055e217c75344c442 -Author: Ronald S. Bultje -Date: Wed Jan 25 13:53:59 2012 +0800 - - Change %ifdef directives to %if directives in *.asm files - This allows combining multiple conditionals in a single statement. - -commit 82d8cdde567b1c1e8d2046bbb831d0daafe8213b -Author: Anton Mitrofanov -Date: Sun Jan 22 22:13:52 2012 +0400 - - Use TV range algorithm for bit-depth conversions - Such sources are more common, so better to be correct for the common case. - This also produces less error for the case of full range than the previous algorithm produced for the case of TV range. - -commit 27a7b05b8330d0756e5e3f6669282561030f54fa -Author: Hii -Date: Wed Jan 25 16:29:22 2012 +0800 - - Bump dates to 2012 - -commit 762f677e095a40e1927086bb08799c01e05c2ee4 -Author: Henrik Gramner -Date: Sat Jan 28 21:38:27 2012 +0100 - - Add Windows resource file - Displays version info in Windows Explorer. - -commit 545b41caa5903ebcb5d9336a59f9bf5a50a45037 -Author: Sergey Radionov -Date: Mon Jan 16 13:22:44 2012 -0800 - - Fix win32 pthread_cond_signal - Isn't used by x264 currently, so didn't cause a problem. - Fix backported from libav. - -commit 697a11e8ecb1376cddd4a8d4f4fa693e41c1987e -Author: Mans Rullgard -Date: Wed Feb 1 15:55:25 2012 -0800 - - ARM: align asm functions to 4 bytes. - Some linkers apparently fail to correctly align ARM functions when mixing with Thumb code. - -commit f59b310fd87b643b59d6e109e49fdf9d0a04ce91 -Author: Anton Mitrofanov -Date: Sun Jan 22 13:00:23 2012 +0400 - - Fix normalization of colorspace when input is packed YUV 4:2:2 - -commit 9fb055856a617f5ddca15a0c5745ff1c1486ad9a -Author: Fiona Glaser -Date: Sat Jan 21 12:54:40 2012 -0800 - - Force keyint-min 1 with Blu-ray - Fixes an issue with referencing across I-frames that's prohibited in Blu-ray for some godforsaken reason. - -commit 77cfcb6acf648da00eb4ddb52bcb7006bc64a61a -Author: Oka Motofumi -Date: Sun Jan 29 20:34:41 2012 +0900 - - Fix crash in --demuxer y4m with unsupported colorspace - -commit 30829c0c7e6bbf40d1b3ed5fcb5a45d85407978f -Author: Anton Mitrofanov -Date: Mon Jan 16 14:02:53 2012 -0800 - - Fix overread/possible crash with intra refresh + VBV - -commit 26c8303472b837e301d789ba569eae01955cf7f6 -Author: Loren Merritt -Date: Wed Jan 18 15:47:07 2012 -0800 - - Fix trellis 2 + subme >= 8 - Trellis didn't return a boolean value as it was supposed to. - Regression in r2143-5. - -commit 7d804baf3bca6ad33e18ccd0a838274214a8a7a0 -Author: Loren Merritt -Date: Fri Jan 6 15:53:29 2012 +0000 - - CABAC trellis opts part 4: x86_64 asm - Another 20% faster. - 18k->12k codesize. - - This patch series may have a large impact on encoding speed. - For example, 24% faster at --preset slower --crf 23 with 720p parkjoy. - Overall speed increase is proportional to the cost of trellis (which is proportional to bitrate, and much more with --trellis 2). - -commit dd354db4db2f26e63ed36eb790052c6794e5a684 -Author: Loren Merritt -Date: Fri Jan 6 15:53:04 2012 +0000 - - CABAC trellis opts part 3: make some arrays non-static - -commit 4abcf60a04e358b87da284f3a5fac3e2949b6de1 -Author: Loren Merritt -Date: Thu Dec 22 17:56:06 2011 +0000 - - CABAC trellis opts part 2: C optimizations - - Hoist the branch on coef value out of the loop over node contexts. - Special cases for each possible coef value (0,1,n). - Special case for dc-only blocks. - Template the main loop for two common subsets of nodes, to avoid a bunch of branches about which nodes are live. - Use the nonupdating version of cabac_size_decision in more cases, and omit those bins from the node struct. - CABAC offsets are now compile-time constants. - Change TRELLIS_SCORE_MAX from a specific constant to anything negative, which is cheaper to test. - Remove dct_weight2_zigzag[], since trellis has to lookup zigzag[] anyway. - - 60% faster on x86_64. - 25k->18k codesize. - -commit 253cd7baefb7f5d101725034b2c37afacc012305 -Author: Loren Merritt -Date: Thu Dec 22 17:55:06 2011 +0000 - - CABAC trellis opts part 1: minor change in output - Due to different tie-break order. - -commit 0d7a9100d12c618acea3f01b8bb9cc306f475b47 -Author: Henrik Gramner -Date: Sun Jan 8 04:14:10 2012 +0100 - - x86inc improvements for 64-bit - - Add support for all x86-64 registers - Prefer caller-saved register over callee-saved on WIN64 - Support up to 15 function arguments - -commit 8a6a062e11d4074c081d076408cb0bd6def1af8e -Author: Ilia Valiakhmetov -Date: Sun Jan 15 04:47:58 2012 -0600 - - High bit depth SSE2/AVX add8x8_idct8 and add16x16_idct8 - From Google Code-In. - -commit a35fd4194dd7004abe6f66679496beded405515a -Author: Edward Wang -Date: Wed Jan 4 15:35:54 2012 -0800 - - MMX/SSE2/AVX predict_8x16_p, high bit depth fdct8 - From Google Code-In. - -commit 9301bbd39fb0a49b1e986f9a7c29685439686de4 -Author: Fiona Glaser -Date: Thu Dec 22 14:03:15 2011 -0800 - - XOP 8-bit fDCT - Use integer MAC for one of the SUMSUB passes. About a dozen cycles faster for 16x16. - -commit c83edc0427e78c58683af99b80e0234c77b3e41a -Author: Cristian Militaru -Date: Wed Jan 4 12:38:08 2012 -0800 - - High bit depth intra_sad_x3_4x4 - From Google Code-In. - -commit 9c0fa2d63f549a44f869562cffa9c041a32ae41d -Author: Fiona Glaser -Date: Thu Dec 8 13:45:41 2011 -0800 - - Use a large LUT for CAVLC zero-run bit codes - Helps the most with trellis and RD, but also helps with bitstream writing. - Seems at worst neutral even in the extreme case of a CPU with small L2 cache (e.g. ARM Cortex A8). - -commit de7aed78cd2f70017f3c479d8f8dc32d52cee607 -Author: Matt Habel -Date: Fri Dec 16 23:16:09 2011 -0800 - - High bit depth intra_sad_x3_8x8, intra_satd_x3_4x4/8x8c/16x16 - Also add an ACCUM macro to handle accumulator-induced add-or-swap more concisely. - -commit d9dee734a9af1788461def43321f19be6a3d2d72 -Author: Shitiz Garg -Date: Sat Dec 3 15:34:57 2011 -0800 - - MMX 10-bit predict_8x8c_h and predict_8x16c_h - From Google Code-In. - -commit 7496fc4aeaaaf5b470b1eb0f73ce8ea71d0116f2 -Author: Aaron Schmitz -Date: Wed Nov 30 00:15:45 2011 -0600 - - Some MBAFF x86 assembly functions. - deblock_chroma_420_mbaff, plus 422/422_intra_mbaff implemented using existing functions. - From Google Code-In. - -commit b8d7b8acb48b45afbfd7efb5baac79475682684a -Author: George Stephanos -Date: Thu Dec 1 16:53:45 2011 -0800 - - More ARM NEON assembly functions - predict_8x8_v, predict_4x4_dc_top, predict_8x8_ddl, predict_8x8_ddr, predict_8x8_vl, predict_8x8_vr, predict_8x8_hd, predict_8x8_hu. - From Google Code-In. - -commit e269ca55e5244280afd0347c1088083cf7043d48 -Author: Ilia -Date: Mon Nov 28 05:20:09 2011 -0800 - - More 4:2:2 asm functions - High bit depth version of deblock_h_chroma_422. - Regular and high bit depth versions of deblock_h_chroma_intra_422. - High bit depth pixel_vsad. - SSE2 high bit depth and MMX 8-bit predict_8x8_vl. - Our first GCI patch this year! - -commit 5d66c5011488539f99ceafdb47b0856a8e9dae0b -Author: Henrik Gramner -Date: Thu Dec 8 16:14:35 2011 +0100 - - SSE2 and SSSE3 versions of sub8x16_dct_dc - Also slightly faster sub8x8_dct_dc - -commit 3ea6a8b22e0aa89e3749e9c95edfeaad9d341b7e -Author: Steven Walters -Date: Mon Dec 5 08:46:34 2011 -0500 - - Resize filter updates - Use AVPixFmtDescriptors to pick the most compatible x264 csp for any pixel format. - Fix deprecated use of av_set_int. - Now requires libavutil >= 51.19.0 - -commit f71d047d0bc129eb9f4724e023bf888a9124338b -Author: Oka Motofumi -Date: Thu Jan 5 14:23:50 2012 -0800 - - Add out-of-tree build support - -commit 5539220e5afc641a6747c6d95f41e5efbe5858e1 -Author: Anton Mitrofanov -Date: Fri Dec 16 18:17:00 2011 +0400 - - Limit SSIM to 100db - Avoids floating point error for infinite SSIM (lossless). - -commit 13c236172f0ff40ca149a2e862498457cd32ccb9 -Author: Reynaldo H. Verdejo Pinochet -Date: Wed Jan 4 13:16:12 2012 -0300 - - Fix wrong conditional inclusion of inttypes.h - inttypes.h is required by encoder/ratecontrol.c for SCNxxx macros, and HAVE_STDINT_H does not imply having inttypes.h. - stdint.h is a subset of inttypes.h, but this isn't enough for x264. - This change fixes building x264 with Android's toolchain. - -commit 2df9d45db64110854e6da6a2037d6c432c5463fe -Author: Anton Mitrofanov -Date: Wed Dec 21 11:08:56 2011 +0400 - - Fix crash with sliced threads and input height <= 112 - -commit e3d311813f3931133962f7ab8ee2305d231df83d -Author: Phillip Blucas -Date: Mon Dec 19 17:43:41 2011 -0600 - - Fix loading custom 8x8 chroma quant matrices in 4:4:4 - -commit 9fd7ccb2b635276d019e137844c693b525f92244 -Author: Anton Mitrofanov -Date: Fri Dec 16 01:48:07 2011 +0400 - - Fix PCM cost overflow - -commit 1d70d0e56003b762439ad4b5d8e72729b51516ae -Author: Anton Mitrofanov -Date: Fri Dec 9 01:54:22 2011 +0400 - - Fix overflow in 8-bit x86 vsad asm function - -commit b6ce6c64c17071804676435da9b1c07b902857e3 -Author: Anton Mitrofanov -Date: Wed Dec 7 19:14:52 2011 +0400 - - Fix crash in --fullhelp when compiled against recent ffmpeg - Don't assume all pixel formats have a description. - -commit c3ba63bbe83bd20d06a64cfecd6b878e8f49bc13 -Author: Fiona Glaser -Date: Tue Dec 6 14:39:21 2011 -0800 - - Fix regression in r2118 - Broke trellis with i16x16 macroblocks. - -commit 9dc2391576b35acb55c04773049a0b817f306969 -Author: Fiona Glaser -Date: Wed Nov 30 13:02:12 2011 -0800 - - Modify MBAFF chroma deblock functions to handle U/V at the same time - Allows for more convenient asm implementations. - -commit d0bf649fcc1a79da12e220c4364aeca6045dfbed -Author: Fiona Glaser -Date: Thu Nov 10 16:16:13 2011 -0800 - - CABAC trellis optimizations: use SIMD quant - Significant speed increase, minor change in output due to rounding. - -commit 6767f967831048669e45e65681f37011483b4fa0 -Author: Steven Walters -Date: Sun Nov 6 09:48:30 2011 -0800 - - YUV range detection and support for x264CLI - Two new options: --input-range and --range. - --input-range forces the range of the input in case of misdetection; auto by default. - -- range sets the range of the output; x264cli will convert if necessary, TV by default. - --fullrange is now removed as a CLI option (but the libx264 API is unchanged). - -commit f9a4c4d9828c1cc60135d0301981ea71fd90f6ca -Author: Kieran Kunhya -Date: Fri Nov 4 20:09:13 2011 +0000 - - Pass through user data - -commit 1c774e936a315fdfb92a35c402b351a1c542a13a -Author: Fiona Glaser -Date: Thu Oct 27 14:05:56 2011 -0700 - - Remove unpredictable branch in CABAC dqp - -commit f3a7517cb9b06a580623cbea0f140be534b99877 -Author: Loren Merritt -Date: Sun Oct 23 23:15:11 2011 +0000 - - x86inc: AVX symmetry optimization - 3-arg AVX ops with a memory arg can only have it in src2, - whereas SSE emulation of 3-arg prefers to have it in src1 (i.e. the move). - So, if the op is symmetric and the wrong one is memory, swap them. - Eliminates redundant moves in some cases when using 3-operand without AVX with memory arguments. - Also fix movss and movsd in some cases, and flag shufps correctly as float. - -commit 5ebbcd8748ae8d8b184db5a8f9b46a9ad865f0ae -Author: Anton Mitrofanov -Date: Tue Nov 29 13:45:13 2011 -0800 - - checkasm: shut up gcc warnings, fix some naming of functions in results - -commit 561f71ebf741370075b970fb9d31a593cf47782f -Author: Mans Rullgard -Date: Mon Nov 28 16:29:12 2011 -0800 - - checkasm: fix build on ARM - Because of how ALIGNED_ARRAY_16 is defined on ARM, array initialisers cannot be used here. Use memset() instead. - -commit 24bf90abde21e77c574f2bd43e38a3222c3183ef -Author: Anton Mitrofanov -Date: Sat Nov 12 01:31:49 2011 +0400 - - Improve makefile rules - Remove the need for "make clean" after most reconfigures. - -commit 87b23e25eee0c04bb47957445e7cf941a7d8b980 -Author: Anton Mitrofanov -Date: Sat Nov 12 00:47:48 2011 +0400 - - Mark some local functions as static, cosmetics - -commit 2ecbcd73d60d2f749696b39627c91e28a396538b -Author: Anton Mitrofanov -Date: Fri Nov 11 23:19:02 2011 +0400 - - Fix crash if timecode file opening fails - -commit f1387840b98560ae34aea9ca09d55984812ad50b -Author: Fabian Greffrath -Date: Fri Nov 11 13:25:43 2011 -0800 - - Configure: force PIC for shared build on PARISC and MIPS - -commit e5063ab30bcb79f94774b6d9ce91b098ade01d6d -Author: Anton Mitrofanov -Date: Sat Oct 22 19:41:07 2011 +0400 - - Improve yasm version check - Previous check allowed certain earlier versions that weren't fully compatible. - -commit 12104b22820b38b4976e83a6ee00dcb59ed959f1 -Author: Fiona Glaser -Date: Tue Oct 18 14:30:26 2011 -0700 - - Add fenc prefetching to adaptive quant - Many fewer cache misses, faster adaptive quant. - -commit 9bbfc30284469a70374a75fecfa322c4740dc2b7 -Author: Fiona Glaser -Date: Tue Oct 18 14:14:03 2011 -0700 - - Split prefetch_fenc between colorspaces - Add 4:2:2 version. - -commit b63a73da3add660358a4bad1a590c2d4ed466dc4 -Author: Fiona Glaser -Date: Tue Oct 11 17:04:32 2011 -0700 - - Some more 4:2:2 x86 asm - coeff_last8, coeff_level_run8, var2_8x16, predict_8x16c_dc, satd_4x16, intra_mbcmp_8x16c_x3, deblock_h_chroma_422 - -commit 50aaf8d84ac6fc78794b98cfe6a25440a09fbb82 -Author: Loren Merritt -Date: Tue Oct 11 18:12:43 2011 +0000 - - Remove obsolete versions of intra_mbcmp_x3 - intra_mbcmp_x3 is unnecessary if x9 exists (SSSE3 and onwards). - -commit 1111780d8e392455870898bacae30a413ae98464 -Author: Loren Merritt -Date: Mon Oct 10 05:42:36 2011 +0000 - - SSSE3/SSE4/AVX 9-way fully merged i8x8 analysis (sa8d_x9) - x86_64 only for now, due to register requirements (like sa8d_x3). - - i8x8 analysis cycles (per partition): - penryn sandybridge bulldozer - 616->600 482->374 418->356 preset=faster - 892->632 725->387 598->373 preset=medium - 948->650 789->409 673->383 preset=slower - -commit 422979198e492d5068034a3a5b1e4991af2b63a1 -Author: Fiona Glaser -Date: Fri Sep 30 19:09:19 2011 -0700 - - SSSE3/SSE4/AVX 9-way fully merged i8x8 analysis (sad_x9) - ~3 times faster than current analysis, plus (like intra_sad_x9_4x4) analyzes all modes without shortcuts. - -commit da66eef02e8d9cb57c52aeecb7371b9968747c2b -Author: Loren Merritt -Date: Wed Oct 5 13:29:21 2011 -0700 - - Merge i4x4 prediction with intra_mbcmp_x9_4x4 - Avoids a redundant prediction after analysis. - -commit 9f027f4f3f9b03b5dabe081a12ca1b80c20ffc18 -Author: Fiona Glaser -Date: Wed Oct 5 13:17:31 2011 -0700 - - Inline i4x4/i8x8 encode into intra analysis - Larger code size, but faster. - -commit a5a6d0eeadbba6ae3232f620345762aebca240ab -Author: Fiona Glaser -Date: Wed Sep 21 17:12:10 2011 -0700 - - Initial XOP and FMA4 support on AMD Bulldozer - ~10% faster Hadamard functions (SATD/SA8D/hadamard_ac) plus other improvements. - -commit e73b85b56437827f881d1406e11d2cca4bbe5583 -Author: Mans Rullgard -Date: Tue Sep 27 21:14:14 2011 +0400 - - ARM: update NEON chroma deblock functions to NV12 pixel format - -commit 9c356e2558948714bdbb991a9f9cb9a3e1f0121b -Author: Sean McGovern -Date: Mon Oct 17 12:45:15 2011 -0700 - - Add /usr/lib/{64/}values-xpg6.o to $LDFLAGS on Solaris - This is required for POSIX.1-2001 compliance. - -commit 6c50ab569d95ebb07e5fb437a38d646bf607c74b -Author: Sean McGovern -Date: Mon Oct 17 12:44:03 2011 -0700 - - Fix linker test for -Bsymbolic - The Solaris linker only accepts -Bsymbolic for objects compiled in dynamic mode (i.e. shared objects), so pass -shared to gcc. - Additionally, for x86_32 unresolved textrels cause a linker error so mark the .text section as 'impure'. - -commit 421c38f22c7bdaf2981b2ffb72332c40cadd7332 -Author: Sean McGovern -Date: Mon Oct 17 12:43:28 2011 -0700 - - Add $SOFLAGS to exported SOFLAGS make variable - -commit dd713cae59c062440b046fe75d60af83d049de3c -Author: Henrik Gramner -Date: Sat Sep 24 15:56:08 2011 +0200 - - Allow setting a chroma format at compile time - Gives a slight speed increase and significant binary size reduction when only one chroma format is needed. - -commit 68f6db44035e8f9d4d00a73e5703eb1d7ff8d619 -Author: Harfe Leier -Date: Fri Sep 30 12:49:33 2011 -0700 - - Improve profile help - List high422/high444 profiles, and don't show non-high-bit-depth profiles in high bit depth builds. - -commit 675110a687459cc03685489470bbc730580a793b -Author: Yusuke Nakamura -Date: Thu Oct 20 03:09:51 2011 +0900 - - Fix infinite loop parsing TDecimate Mode 3 timecode v1 files - -commit 2ec99b3b94f986b456de1525087ee85b6fa79091 -Author: Fiona Glaser -Date: Mon Oct 10 17:44:31 2011 -0700 - - Fix some integer overflows/signedness errors found by IOC - The only real bug here is in slicetype.c, which may or may not affect real encodes. - -commit ae1288c43780ed9be60b59dd556d5f85df7498e2 -Author: Fiona Glaser -Date: Wed Oct 12 09:16:32 2011 -0700 - - Fix pixel_var2 with 4:2:2 encoding - Might have caused artifacts or suboptimal chroma compression. - -commit 9ac39f6078659f4f5cf548460dec940a04fd52c8 -Author: Anton Mitrofanov -Date: Sun Oct 9 19:14:16 2011 +0400 - - Fix chroma intra analysis in 4:4:4 lossless mode - -commit 294df95060118de1d605ce20fcf490cdb4f4d14c -Author: Anton Mitrofanov -Date: Sun Oct 9 01:13:29 2011 +0400 - - Fix use of uninitialized MVs in sub8x8 RDO - -commit 3ff2feee5a176ec8012c313e4a9e2b3611f29614 -Author: Fabian Greffrath -Date: Fri Oct 7 19:04:17 2011 -0700 - - Fix detection of Alpha CPU arch on alphaev67 - -commit 2701440c515a9a8aee1c87d7c06c98e43c3d813f -Author: Fiona Glaser -Date: Wed Sep 14 14:53:04 2011 -0700 - - Optimize x86 asm for Intel macro-op fusion - That is, place all loop counter tests right before their conditional jumps. - -commit 2d481bc0ee053634fe46c0df2cbc646733dd137d -Author: Fiona Glaser -Date: Mon Sep 12 11:51:23 2011 -0700 - - CAVLC: clean up and restructure - Somewhat faster CAVLC and RD bit-counting. - -commit da768d95d5d63f1eac77a35731079ce02aaa125c -Author: Fiona Glaser -Date: Thu Sep 8 17:27:02 2011 -0700 - - CABAC: clean up and restructure - Somewhat faster CABAC and RD bit-counting. - -commit 389b401a99f2f33b41db7d74904b3ff7509d79e5 -Author: Fiona Glaser -Date: Sun Sep 4 11:31:29 2011 +0200 - - Some initial 4:2:2 x86 asm - -commit 5b0cb86f27ba0c5433c404bed51c06a5124dfb49 -Author: Henrik Gramner -Date: Fri Aug 26 15:57:04 2011 +0200 - - 4:2:2 encoding support - -commit 3d82e875d06b9d1e15ca2baa16b1bd9640500972 -Author: Loren Merritt -Date: Mon Aug 15 18:18:55 2011 +0000 - - SSSE3/SSE4 9-way fully merged i4x4 analysis (sad/satd_x9) - - i4x4 analysis cycles (per partition): - penryn sandybridge - 184-> 75 157-> 54 preset=superfast (sad) - 281->165 225->124 preset=faster (satd with early termination) - 332->165 263->124 preset=medium - 379->165 297->124 preset=slower (satd without early termination) - - This is the first code in x264 that intentionally produces different behavior - on different cpus: satd_x9 is implemented only on ssse3+ and checks all intra - directions, whereas the old code (on fast presets) may early terminate after - checking only some of them. There is no systematic difference on slow presets, - though they still occasionally disagree about tiebreaks. - - For ease of debugging, add an option "--cpu-independent" to disable satd_x9 - and any analogous future code. - -commit e184ff26233198932d9b77aa7feed6a49095f136 -Author: Loren Merritt -Date: Mon Aug 15 17:43:42 2011 +0000 - - Faster intra_mbcmp_x3 for versions without dedicated asm - Select asm subroutines more intelligently in the wrapper functions. - -commit d94edd734304c16265f28dd11e8a2029cbdc5b7f -Author: Loren Merritt -Date: Sat Aug 13 19:01:22 2011 +0000 - - Optimize x86 intra_predict_4x4 and 8x8 - - High bit depth Penryn, Sandybridge cycles: - 4x4_ddl: 11->10, 9-> 8 - 4x4_ddr: 15->13, 12->11 - 4x4_hd: , 15->12 - 4x4_hu: , 14->13 - 4x4_vr: 15->14, 14->12 - 8x8_ddl: 32->19, 19->14 - 8x8_ddr: 42->19, 21->14 - 8x8_hd: , 15->13 - 8x8_hu: 21->17, 16->12 - 8x8_vr: 33->19, - - 8-bit Penryn, Sandybridge cycles: - 4x4_ddr: 24->15, - 4x4_hd: 24->16, - 4x4_hu: 23->15, - 4x4_vr: 23->16, - 4x4_vl: 10-> 9, - 8x8_ddl: 23->15, - 8x8_hd: , 17->14 - 8x8_hu: , 15->14 - 8x8_vr: 20->16, 17->13 - -commit 37b2d963b262d2880271f313a17fceeee27a3de8 -Author: Loren Merritt -Date: Sat Aug 13 06:44:28 2011 +0000 - - Use realistic alignment for intra pred benchmarks in checkasm - -commit 10ef9590e33d209a937fcb3f5ca1be66fb481a17 -Author: Yusuke Nakamura -Date: Wed Sep 21 01:15:38 2011 +0900 - - Fix frame packing SEI with --frame-packing 0 - According to the spec, when frame_packing_arrangement_type is equal to 0, quincunx_sampling_flag shall be equal to 1. - -commit cb648060484f081eba39480a26791a8e0d605989 -Author: Oka Motofumi -Date: Mon Sep 5 11:50:37 2011 +0900 - - Fix install/uninstall shared libs if SYS is WINDOWS/CYGWIN - -commit d2452266ccf4bd9552d7ac94b5600b416d757d34 -Author: Reinhard Tartler -Date: Wed Aug 10 00:16:46 2011 -0700 - - Add Hurd support to configure - -commit 75de7be6d5e7b0e1fc0febf087be65e91c00b80b -Author: Loren Merritt -Date: Sat Aug 13 00:39:35 2011 +0000 - - Optimize x86 intra_satd_x3_* - ~7% faster. - -commit b597966bfa8a481489e5af93eb25988456c51a5d -Author: Loren Merritt -Date: Fri Aug 12 19:13:07 2011 +0000 - - Optimize x86 intra_sa8d_x3_8x8 - ~40% faster. - Also some other minor asm cosmetics. - -commit f3fc0c4485aa3ed1774bce462ad3fb92faec114b -Author: Loren Merritt -Date: Fri Aug 12 02:15:46 2011 +0000 - - Scale interlaced refs/mvs for mvr predictors - Slightly improves compression and fixes a Valgrind error. - -commit ebc334f8d1d2752b9bc2c56e457fffc123ffddee -Author: Loren Merritt -Date: Thu Aug 11 15:03:12 2011 +0000 - - Optimize predict_8x8_filter and incidentally remove a valgrind false-positive - -commit 94493149bbc251d0ce4ceee85a9faa5ad8837a04 -Author: Anton Mitrofanov -Date: Mon Aug 15 12:22:18 2011 +0400 - - Don't override flat SSE2 dequant functions with non-flat AVX ones - Slightly faster. - -commit 25a8bb9461bf7b0c75e7fadc8d104dbdc61bed5c -Author: Loren Merritt -Date: Mon Aug 8 13:40:53 2011 +0000 - - Shut up some valgrind false-positives - -commit ede9651875846116bdb2d3d84e3630beada7e21d -Author: Fiona Glaser -Date: Tue Aug 16 13:02:24 2011 -0700 - - Avoid some unnecessary allocations with B-frames/CABAC off - -commit 17f16d161e386457f7eaa96866550c497af681d5 -Author: Fiona Glaser -Date: Mon Aug 22 17:07:03 2011 -0700 - - Fix typo in p8x8 RD analysis - Passed wrong idx to trellis. - -commit 5a22495a2a857b9fcd5825595422c78f0223a417 -Author: Anton Mitrofanov -Date: Sun Aug 21 02:44:45 2011 +0400 - - Fix invalid memory accesses in x86 lowres_init when width <= 16 - -commit 8b72a9e4c87bbdfa1b87609fa9cde9bf61440383 -Author: Anton Mitrofanov -Date: Mon Aug 15 12:03:09 2011 +0400 - - Fix intermediate conversion for YUVJ* pixfmts with 4:4:4 encoding - -commit cec1f4039fb6f4bf1c5ef97648b94e489400e5bc -Author: Henrik Gramner -Date: Sun Aug 14 13:39:29 2011 +0200 - - Fix pic_out returned by x264_encoder_encode with 4:4:4 - -commit eaa858d33b9dcb6e526b01cc39d0268d4ae6d8c0 -Author: Loren Merritt -Date: Thu Aug 11 22:12:26 2011 +0000 - - Fix zeroing of mvr predictors in bskip blocks - -commit 29e318fd26bd3a2e689801aeb9ff84d9e6c1d25f -Author: Loren Merritt -Date: Thu Aug 11 01:33:13 2011 +0000 - - Fix: chroma planes for weightp analysis were not initted if U early-terminates and V doesn't. - -commit af0d8d8588e9eed4c4895747fcb7485dd0210bcf -Author: Henrik Gramner -Date: Wed Aug 10 20:25:07 2011 +0200 - - Expand borders before chroma weightp analysis - Prevents mc from using uninitialized source pixels. - -commit cfcce49df42848f601cb05086d1ef89c23675397 -Author: Henrik Gramner -Date: Wed Aug 10 19:29:14 2011 +0200 - - Another 4:4:4 chroma weightp bug fix - -commit 51821635e8dccf877c3521830a8a5598c2bc408b -Author: Fiona Glaser -Date: Wed Aug 10 00:17:26 2011 -0700 - - Fix typo in help - -commit 3817e54a3aeaa387206f78d5eaee98339dd7d93b -Author: Fiona Glaser -Date: Sat Aug 6 10:45:47 2011 -0700 - - Improve support for varying resolution between passes - Should give much better quality, but still doesn't support MB-tree yet. - Also check for the same interlaced options between passes. - Various minor ratecontrol cosmetics. - -commit 9b9a13a98b98385884b7ac25710305ad431c62e4 -Author: Loren Merritt -Date: Sun Aug 7 22:57:27 2011 +0000 - - asm cosmetics: base-4 constants for shuffles - -commit 7e60fcd7af513e48d912dfce21026420698ed6ba -Author: Loren Merritt -Date: Wed Aug 3 14:58:50 2011 +0000 - - Enable some existing asm functions that were missing function pointers - pixel_ads1_avx, predict_8x8_hd_avxx - High bit depth mc_copy_w8_sse2, denoise_dct_avx, prefetch_fenc/ref, and several pixel*sse4. - -commit 52f287e84a9965f652221f535a3298c7ce0846b9 -Author: Loren Merritt -Date: Wed Aug 3 14:57:06 2011 +0000 - - Remove some unused, broken, and/or useless functions - Unused frame_sort. - Unused x86_64 dequant_4x4dc_mmx2, predict_8x8_vr_mmx2. - Unused and broken high_depth integral_init*h_sse4, optimize_chroma_*, dequant_flat_*, sub8x8_dct_dc_*, zigzag_sub_*. - Useless high_depth dequant_sse4, dequant_dc_sse4. - -commit 309ddabbb3fba9ba0a2ae4c23470ec539d052374 -Author: Loren Merritt -Date: Wed Aug 3 14:56:27 2011 +0000 - - asm cosmetics: merge all the variants of ABS macros - -commit 1921c6824e37bdf5a8436a6cbe36b0d3a8c376b3 -Author: Loren Merritt -Date: Wed Aug 3 14:53:29 2011 +0000 - - asm cosmetics part 2 - These changes were split out of the cpuflags commit because they change the output executable. - -commit f85be1cdbe8d9244c0465df13ed58215a8c673cc -Author: Loren Merritt -Date: Wed Aug 3 14:46:41 2011 +0000 - - asm cosmetics: INIT_MMX/XMM/YMM now support a cpuflags argument - - Reduces the number of macro args that need to be passed around. - Allows multiple implementations of a given macro (e.g. PALIGNR) to check - cpuflags at the location where the macro is defined, instead of having - to select implementations by %define at toplevel. - Remove INIT_AVX, as it's replaced by "INIT_XMM avx". - - This commit does not change the stripped executable. - -commit 67336688cdc0c47468cef4e6f8cf57ffd010b56e -Author: Loren Merritt -Date: Wed Aug 3 14:43:34 2011 +0000 - - Import x86inc.asm patches from libav - -commit 189c30d390d08b2b3d3007acd0a106a4e0cd17b2 -Author: Loren Merritt -Date: Wed Aug 3 14:42:12 2011 +0000 - - Cosmetics: s/mmxext/mmx2/ - -commit b37de18947348199bdc392b38e979f619978126e -Author: Henrik Gramner -Date: Sun Aug 7 11:58:36 2011 +0200 - - Fix two bugs in 4:4:4 chroma weightp analysis - Caused slightly worse compression. - -commit 014f9c8e3fa202f13f926ac037c3a8db494522ea -Author: Loren Merritt -Date: Wed Aug 3 14:40:01 2011 +0000 - - Fix "--asm avx" - Previously required "--asm sse2fast,fastshuffle,sse4.2,avx". - -commit 3674cf4fd338a7894883a0172ec6bde61eac6c25 -Author: Anton Mitrofanov -Date: Fri Aug 5 15:59:20 2011 +0400 - - Re-add support for glibc <2.6, which doesn't have CPU_COUNT - -commit 1dd4b85fc700db5ec4380e20c2d73882808b3763 -Author: Yasuhiro Ikeda -Date: Tue Aug 2 08:59:15 2011 +0900 - - Avoid using deprecated libavformat functions - Replace av_find_stream_info with avformat_find_stream_info. - Now requires libavformat 53.3.0 or newer. - -commit 191b68df93e7ad4096c6aa4df4120dcb0e83dded -Author: Henrik Gramner -Date: Wed Jul 27 02:23:12 2011 +0200 - - Use assembly versions of some deblocking functions in MBAFF - -commit 459ac481e85833550470d231ae4749a138146614 -Author: Anton Mitrofanov -Date: Thu Jul 28 00:26:27 2011 +0400 - - Move X264_VERSION / X264_POINTVER from config.h to x264_config.h - This makes them available to external programs as part of the public API. - -commit 95f03f9e89c04b29aa4b5ad57fa4869899eedb4c -Author: Henrik Gramner -Date: Fri Jul 29 20:15:52 2011 +0200 - - Fix padding bug in x264_expand_border_mbpair - -commit eee242c1a64db0c4975eaf9add82565502882293 -Author: Yusuke Nakamura -Date: Fri Jul 29 23:39:26 2011 +0900 - - Timecode parsing: Add missing initialization - Fix crash when failed to parse timecode file before malloc pts. - Fix detection of user timebase considered to be exceeding H.264 maximum. - -commit e1ec7c8ae8d865165c802a69387e4d41cb004e43 -Author: Anton Mitrofanov -Date: Thu Jul 28 13:37:24 2011 +0400 - - Fix crash with high bitdepth 4:2:0 input - -commit 10dc5bb27739fd112f5b94ffb9419fa8781c5bbe -Author: Daniel Kang -Date: Tue Jul 26 21:57:39 2011 -0400 - - x86 asm cosmetics - Use FDEC_STRIDEB where appropriate. - -commit bbfbacc9d3fa89cd922f33feb3924b67fdf31f7b -Author: Fiona Glaser -Date: Tue Jul 26 07:40:23 2011 -0700 - - Fix a bug in lossless sub-8x8 RD - Caused crashes in rare cases with lossless encoding. Regression in 4:4:4. - -commit 10474f5af22f3b2444a4301252175657b6fb1514 -Author: Fiona Glaser -Date: Mon Jul 18 23:10:30 2011 -0700 - - Improved p8x4/4x8 search decision - Use the same thresholding as for p16x8/8x16. - Does p8x4/4x8 search more often, for a small compression improvement. - -commit 4a88ee1c649d92bbdbbf128e22d547e9b833f00c -Author: Dan Larkin -Date: Wed Jul 13 12:45:23 2011 -0500 - - Add --subme 11, which disables all early terminations in analysis - Necessary for a future trellis mode decision/motion estimation patch. - Also add the slowest presets to the regression test. - -commit 330c8fdaccd63383ba6f7f1ccf787a5f1b89d09b -Author: Dan Larkin -Date: Wed Jul 13 11:33:48 2011 -0500 - - Some trivial changes to RD thresholds - The output-changing portion of the next patch. - -commit b5e21b60fe4422c85b9f27eda6f45d7a5517e160 -Author: Anton Mitrofanov -Date: Wed Jul 20 22:54:43 2011 +0400 - - Allow setting a wider range of chroma QP offsets - This allows use of the full range of chroma QP offsets, even in combination with the automatic psy-based adjustments. - -commit 1f285bd40b45dfa97fadc86f912a19c54563fa77 -Author: Fiona Glaser -Date: Fri Jul 15 13:24:38 2011 -0700 - - Optimize macroblock_deblock_strength, add more early terminations - -commit 695bac1d7e66ead90952e333abeab0176ea7221d -Author: Fiona Glaser -Date: Thu Jul 14 18:23:44 2011 -0700 - - Function-pointerify MBAFF deblocking functions - -commit 75466d2e4fff1aeba7e64a1947e8beea3f1235ff -Author: Fiona Glaser -Date: Thu Jul 14 14:04:11 2011 -0700 - - Clean up MBAFF deblocking code - -commit 8ae69dbc7ec37e157a3890c21ec4904973e800f9 -Author: Fiona Glaser -Date: Tue Jul 12 17:27:18 2011 -0700 - - Optimize frame_deblock_row - -commit 44269ed290f1a5457c24b6e2992bc65e92a70ac4 -Author: Henrik Gramner -Date: Wed Jul 20 22:30:59 2011 +0200 - - Shrink two arrays - -commit aea1565f5f5d793935b10cd6081bf8dbe9513db5 -Author: Anton Mitrofanov -Date: Mon Jul 18 15:20:05 2011 +0400 - - Add support for the new (4:4:4) colorspaces to x264_picture_alloc - -commit e93cfd6adcdd246372a38f2598590c0ab7c69b7d -Author: Anton Mitrofanov -Date: Wed Jul 20 18:06:41 2011 +0400 - - Various cosmetics - -commit 3ef68d34b477bfd7410267eecbeaa8ebb44bccc4 -Author: Yasuhiro Ikeda -Date: Tue Jul 12 23:41:42 2011 +0900 - - Improve configure help - -commit 9dd3e96e9420fac3cb00d44eab75450c630fe231 -Author: Yasuhiro Ikeda -Date: Tue Jul 12 14:46:29 2011 +0900 - - Use $optarg for some configure options - -commit f7e6610ba12319d68833526676b16879aaff415c -Author: Rafaël Carré -Date: Thu Jul 14 18:51:43 2011 -0700 - - Linux x264_cpu_num_processors(): use glibc macros - The cpu_set_t structure is considered opaque. - Also handle sched_getaffinity() error case if "cpusetsize is smaller than the size of the affinity mask used by the kernel." - -commit 670d81811866e9e5045d25c5def5ba2b9f06d2ac -Author: Anton Mitrofanov -Date: Thu Jul 14 17:02:43 2011 +0400 - - Fix spurious "stream properties changed" with --seek option on some inputs - -commit aa50e72e7c723927325d031ab47b24e069dde4e3 -Author: Anton Mitrofanov -Date: Fri Jul 15 15:06:37 2011 +0400 - - Fix use of deprecated libavcodec functions - Replace avcodec_open with avcodec_open2. Now requires libavcodec 53.6.0 or newer. - -commit 67c796a37233e66239226bacd74f038281d43095 -Author: Kieran Kunhya -Date: Wed Jul 13 20:25:40 2011 +0100 - - Fix nalu_process callback with HRD - -commit bb784df93d92fb28f67a7998faed0da425b25623 -Author: Anton Mitrofanov -Date: Wed Jul 13 15:55:38 2011 +0400 - - Fix incorrect chroma swap for some input pixfmts - - Problem occurred if pixfmt of lavf/ffms input was PIX_FMT_RGB24 or PIX_FMT_YUV444P. - -commit ad1c2c8e383cb0f23ba8a0ba2ae211ad9f5eba62 -Author: Anton Mitrofanov -Date: Tue Jun 28 21:39:09 2011 +0400 - - Fix resize filter crash with YUVJ* input pixfmt - -commit ce55ae08a6aad516e6aa2ed58fd93a2adf39a997 -Author: xvidfan -Date: Wed Jun 22 18:46:14 2011 -0700 - - RGB encoding support - Much less efficient than YUV444, but easy to support using the YUV444 framework. - -commit a93e4c4a75d05e7bf379cb9a39caad57f615eeb0 -Author: Fiona Glaser -Date: Wed Jun 22 03:32:53 2011 -0700 - - 4:4:4 encoding support - -commit 323469e393af71dedd357763883232a293c3ab02 -Author: Fiona Glaser -Date: Mon Jun 20 16:20:21 2011 -0700 - - Properly weight slice header lambda in chroma weightp analysis - -commit ae61d0c3c236140b6a7ee4ae5f691cf8191b2282 -Author: Daniel Kang -Date: Sun Jul 3 17:32:00 2011 -0400 - - Better x86 high bit depth predict_8x8c_p - Avoid the need to check for corner cases by reordering arithmetic. - Also make a minor optimization to high bit depth predict_16x16_p. - -commit a8e1be77d59ff3e5ba565b6ee133a1b2364a2dfa -Author: Fiona Glaser -Date: Thu Jun 23 11:54:42 2011 -0700 - - Eliminate extra layer of indirection for sps/pps references - Also remove poc type 1 support (it didn't work anyways) to reduce sps size. - -commit 8ade503619aff45e5be0ee544d8ab8c867eb5720 -Author: Fiona Glaser -Date: Sat Jul 9 19:21:00 2011 -0700 - - Fix SSIM calculation with sliced threads - -commit 03bf7da697967bb8ed0b014e8623532b58051240 -Author: Anton Mitrofanov -Date: Sat Jul 9 23:57:44 2011 +0400 - - Avoid possible NaNs in B-frame output stats - -commit defbf3f4d26d348bf07ec91588a304b59588d96e -Author: Rémi Denis-Courmont -Date: Thu Jun 30 14:07:43 2011 -0700 - - ARM: do not override the toolchain default for FPU ABI - -commit fb629fcf1d280778f50db39f6c1038158321cc3c -Author: Steven Walters -Date: Thu Jun 23 20:29:01 2011 -0400 - - Fix link errors with libswscale/libavutil as shared libraries - -commit e825c625999ddc0a27fc6c5cc0b39f198c22b021 -Author: Steven Walters -Date: Sat Jun 18 14:12:34 2011 -0400 - - Fix deprecation in libavformat usage - Replace av_open_input_file with avformat_open_input. Now requires libavformat 53.2.0 or newer. - -commit d89c1b43816f05e43a836d38764d74d499e82a80 -Author: Anton Mitrofanov -Date: Thu Jun 9 01:34:14 2011 +0400 - - Fix various issues with VBV+threads - Eliminate the race condition with interframe row predictors and threads. - Recalculate frame_size_estimated at the end of a frame, for improved update_vbv_plan. - Some cosmetics. - -commit ed3b10eb9cffcc346b5a070ce47f5a2beaf9efb6 -Author: Anton Mitrofanov -Date: Mon Jun 6 13:54:44 2011 +0400 - - Fix MBAFF row VBV ratecontrol - Reverts most of r1984 and implements a much simpler solution. - -commit d091d0e6038e770ada1a856c601c401ba729d083 -Author: Fiona Glaser -Date: Mon May 23 17:01:02 2011 -0700 - - Make ratecontrol_mb less slow - -commit 63eb8bc9b48564f777e98dd2528c07cff09184b1 -Author: Steven Walters -Date: Thu Jun 2 21:23:04 2011 -0400 - - Resize filter updates - Fix use of deprecated sws_getContext. - Fix uses of sws_format_name. - Fix stream change warning not occurring on the first resolution change. - Drop cpu detection, as it is now performed internally by swscale. - Update swscale version requirements. - -commit d2e8686121a0418f466a0d79ef6a5367e944f940 -Author: Fiona Glaser -Date: Tue May 17 14:50:51 2011 -0700 - - AVX mbtree_propagate - Up to ~20-30% faster than SSE2 on Sandy Bridge. - -commit 6d2b51a32bbaabee1a8762adb204d035d590331b -Author: Fiona Glaser -Date: Tue Jun 14 10:26:56 2011 -0700 - - Use -vsync 0 with ffmpeg regression test - -commit 06fbd9df654cd2b7a025c12b3a7d4b3fb3386e23 -Author: Henrik Gramner -Date: Sat May 21 19:04:46 2011 +0200 - - Inline emms instructions on x86 if possible - -commit f7c6d308f38b3193dbb7bd9f427252e296dfcbfe -Author: Fiona Glaser -Date: Tue Jun 14 09:35:03 2011 -0700 - - Make left_index_table const - Should allow for some missed compiler optimizations in macroblock_cache_load. - -commit ca7852e211b5a270a8e4752526378a898f669017 -Author: Hii -Date: Tue May 24 08:31:17 2011 +0800 - - Make --profile main/baseline force off CQMfile - -commit ae2a6d80432fe5fa024227742043aca976795d38 -Author: Anton Mitrofanov -Date: Wed Jun 1 02:11:56 2011 +0400 - - Fix VBV bug caused by zero i_row_satd value for first and last row - -commit 0ce601d591e6dd029c4ae05e02f0d01bcbdcca14 -Author: Anton Mitrofanov -Date: Tue May 31 00:13:22 2011 +0400 - - Fix crash with VBV + forced QP - -commit 6633bb9e35880c59cc23f176954f10c36db85a2b -Author: Anton Mitrofanov -Date: Mon May 30 02:36:31 2011 +0400 - - Fix VBV bug with MinCR limit - -commit 0279e564a419353917e8fffd42e9ef737b25d740 -Author: Fiona Glaser -Date: Fri May 20 10:43:28 2011 -0700 - - Fix bitstream reallocation with slice-max-size + MBAFF - -commit 5a37283db5c7c6b39d7ce7dc69a19480aff3c320 -Author: Nikoli -Date: Fri Apr 29 14:19:22 2011 +0400 - - Improve build system capabilities - Make static lib and CLI optional. - Support linking CLI to system libx264. - Don't strip by default, to match GNU packaging guidelines. - -commit 6c54a135f5d552cbed4d3067aae2621ffb4f73af -Author: Fiona Glaser -Date: Tue Apr 26 05:12:26 2011 -0700 - - Slightly speed up x86 CABAC asm - Also make some various cleanups. - -commit bee57e6df38792a01053a96cecc3ecd30a2df434 -Author: Fiona Glaser -Date: Wed May 4 23:26:19 2011 -0700 - - Faster pixel_memset - ~4x faster. - Also inline plane_expand_border for improved constant propagation. - -commit 11130b0cf9192a296ba8a1521b5f80219294a6d7 -Author: Fiona Glaser -Date: Thu May 5 00:42:43 2011 -0700 - - Add checkasm tests for memcpy_aligned, memzero_aligned - Also make memcpy_aligned support sizes smaller than 64. - -commit 7ad06554bb2fbec2b543417bdaab15e0ac4bc366 -Author: Fiona Glaser -Date: Sun May 8 18:46:52 2011 -0700 - - MBAFF: Add regularization to VSAD metric - Bias towards the MBAFF decisions made in neighboring mb pairs. - ~2% better compression on a random 1080i HDTV source. - -commit 1c7caa534ad1f61cd20587626e46275d2a8c7731 -Author: Fiona Glaser -Date: Sun May 8 18:46:39 2011 -0700 - - MBAFF: Improve handling of bottom row mod32 padding - Force skip on any MBs entirely outside the frame - If an mb pair in the bottom row is chosen to be progressive, re-pad the bottom rows progressively. - -commit 52b3d8031b82e7672033bdc60899c1a5acf0e3b3 -Author: Fiona Glaser -Date: Sun May 8 19:17:36 2011 -0700 - - MBAFF: Add frame/field MB stats - -commit a313dc97952e6f004475f044c181d0df3b7430af -Author: Simon Horlick -Date: Wed Apr 27 12:49:25 2011 +0100 - - MBAFF: Template direct spatial - -commit 73dec30bc846277a64667f41b8a09e295273e896 -Author: Simon Horlick -Date: Mon Apr 25 21:22:59 2011 +0100 - - MBAFF: Template cache_load and cache_load_neighbours - -commit 08502a7c8d6e1bad719d958a546816c55791676e -Author: Simon Horlick -Date: Mon Apr 25 09:06:24 2011 +0100 - - MBAFF: Make interlaced support a compile time option - -commit 8029e6640967ee71b4ff94233615a5e291da62f4 -Author: Simon Horlick -Date: Sun Apr 17 10:05:51 2011 +0100 - - MBAFF: Don't call zigzag_init for every mb - -commit 874f9b5c23dbec867b5db2a29e466487d180f9d6 -Author: Simon Horlick -Date: Fri Mar 25 13:36:21 2011 +0000 - - MBAFF: Modify ratecontrol to update every two rows - -commit a5fa92aab92d4d3bceb1147a3055dc1d63409d9c -Author: Simon Horlick -Date: Wed Mar 23 21:55:03 2011 +0000 - - MBAFF: Add support for slice-max-size - - Also add slice-max-size to the regression tests. - -commit 63b0255d6e991343d5afbe241d9be85be501584e -Author: Simon Horlick -Date: Wed Mar 23 21:54:21 2011 +0000 - - MBAFF: Add support for slice-max-mbs - -commit 002695f017a507253c58ac9b453dc6e69d769dc6 -Author: Simon Horlick -Date: Thu Mar 17 17:39:18 2011 +0000 - - MBAFF: Adaptive quantization - - Compute energy for interlaced and progressive choices and pick the least. - -commit ff33967025a683098e5008d9a0684ec068e04a85 -Author: Simon Horlick -Date: Mon Mar 14 02:54:30 2011 +0000 - - MBAFF: Enable adaptive MBAFF with VSAD decision - -commit fea488715ec64542ce87a3312644bcfda994d6d9 -Author: Simon Horlick -Date: Sat Apr 23 10:44:04 2011 +0100 - - MBAFF: Create a VSAD DSP function - - x86 assembly by Fiona Glaser. This gives roughly 30x speed - increase over the C version. - -commit 3a7194f124fbf99cf3a3ca7aef45790196546f88 -Author: Simon Horlick -Date: Tue Mar 15 01:17:01 2011 +0000 - - MBAFF: Direct spatial - -commit 27c5c5d090ca5f64ae2998e1de70480c46bce87b -Author: Simon Horlick -Date: Tue Mar 15 01:16:20 2011 +0000 - - MBAFF: Direct temporal - -commit 638092d32aafe22201bae8a63fa5e7a005e4a7de -Author: Simon Horlick -Date: Tue Mar 15 01:15:06 2011 +0000 - - MBAFF: Calculate bipred POCs - - Need to calculate two tables for the cases where the current macroblock is - progressive or interlaced as refs are calculated differently for each. - -commit d5c57af4055d4177ca1c67fe7e3ac36e07dca179 -Author: Simon Horlick -Date: Tue Mar 15 01:39:49 2011 +0000 - - MBAFF: Use both left macroblocks for ref_idx calculation - -commit 5eccc7d2263ffd63921b95b9e95152ffb6b3645c -Author: Simon Horlick -Date: Sun Apr 3 15:23:35 2011 +0100 - - MBAFF: First edge deblocking - -commit a71cd871325158577a3be8ed96e8abfe22645042 -Author: Simon Horlick -Date: Mon Mar 21 11:03:23 2011 +0000 - - MBAFF: Implement left edge deblocking functions - -commit 94b9141609d17ebbeb3184a8a5fc0660725a4cf2 -Author: Simon Horlick -Date: Sat Apr 2 18:27:13 2011 +0100 - - MBAFF: Add extra data to the deblock strength structure - -commit 8c2114db1938e04b4122fcb96b6380329bf1cf31 -Author: Simon Horlick -Date: Wed Mar 16 21:27:07 2011 +0000 - - MBAFF: Deblocking support - -commit efcfead77b71e65eaf3412b02c525ba6b1f59c90 -Author: Simon Horlick -Date: Mon Mar 21 11:02:27 2011 +0000 - - MBAFF: Move common code from deblock functions - -commit e30bb5318d32ae107560c7242b2f361abed0c6a6 -Author: Simon Horlick -Date: Wed Mar 16 21:18:59 2011 +0000 - - MBAFF: Add mbaff deblock strength calculation - - Move call to deblock_strength to x264_macroblock_deblock_strength to - keep deblock strength calculation in one place. - -commit be87b09f435052a22f7ac15a469d75964d572b2f -Author: Simon Horlick -Date: Thu Apr 21 01:47:53 2011 +0100 - - MBAFF: Update x264_cabac_mvd_sum_mmxext to work with larger MVDs. - - Author: Loren Merritt - -commit 7e081bd859a7ca89e702e80a9f8997064d93b196 -Author: Simon Horlick -Date: Tue Mar 29 15:47:04 2011 +0100 - - MBAFF: Clamp MVDs to 66 instead of 33 - -commit 02d7ef5f1696396892a60d6ce6f6cec2408df92b -Author: Simon Horlick -Date: Tue Mar 29 15:46:34 2011 +0100 - - MBAFF: CABAC encoding of skips - -commit 532bb60a6695ba1ceb698747e57107de422491f2 -Author: Simon Horlick -Date: Sun Feb 20 15:31:55 2011 +0000 - - MBAFF: Track what interlace decision the decoder is using - -commit 313d3770baa69915f7f5e7ba87c7aa79110b1ab4 -Author: Simon Horlick -Date: Sun Feb 6 22:58:39 2011 +0000 - - MBAFF: Fix mvy bounds - - Fix MV clipping - -commit 134ed96db6397bb7b8f1a5ff792ef1a0908d8cf1 -Author: Simon Horlick -Date: Wed Mar 16 21:34:51 2011 +0000 - - MBAFF: Copy deblocked pixels to other plane - -commit 7055d13f2f55ad7e78c1c12aad3a7024ac0be7f1 -Author: Simon Horlick -Date: Tue Mar 29 20:26:33 2011 +0100 - - MBAFF: Disallow skip where predicted interlace flag would be wrong - -commit a1974d1cb8ad9e1c6e3a7eedf09eac7c5ce6b162 -Author: Simon Horlick -Date: Tue Mar 29 20:25:23 2011 +0100 - - MBAFF: Inter support - -commit 2e5fc7235679bc968e3d4ac73ad1f39fa68b9987 -Author: Simon Horlick -Date: Fri Jan 14 21:18:14 2011 +0000 - - MBAFF: Neighbour calculation - - Back up intra borders correctly and make neighbour calculation several times longer. - -commit 689a8258fd9a805cc46ae16fc2a0a22c31f8b76f -Author: Simon Horlick -Date: Tue Jan 11 20:21:26 2011 +0000 - - MBAFF: Store references to the two left macroblocks - -commit a13ba181b7e1bcc42d6c8855de2c20ce1b652591 -Author: Simon Horlick -Date: Tue Jan 11 20:16:18 2011 +0000 - - MBAFF: Store left references in a table - -commit 740f203f8248450430eca5aeac74bcb0f3269c64 -Author: Simon Horlick -Date: Tue Jan 11 20:09:00 2011 +0000 - - MBAFF: Disable adaptive MBAFF when subme 0 is used - -commit a11114e6da54932f59afb02ea13ae41aaf4f3f98 -Author: Simon Horlick -Date: Tue Jan 11 20:05:54 2011 +0000 - - MBAFF: Save interlace decision for all macroblocks - -commit e64d7a9fdb92005399de1146177f155760897049 -Author: Fiona Glaser -Date: Thu May 12 10:21:16 2011 +0800 - - Fix bug in NAL buffer resizing - Also properly terminate if NAL buffer resizing fails. - -commit c4c995a8838f673ded01cf85ab023c2b7578106d -Author: Anton Mitrofanov -Date: Thu May 5 16:27:49 2011 +0400 - - Fix zone bitrate multiplier and QP forcing in 2-pass mode - Previously zone changes could affect frames outside of the given frame range (around 20 neighboring frames). - -commit 4a9ead8bfa95d7c93ca022242ac7ae4912deb776 -Author: Fiona Glaser -Date: Thu May 5 03:24:38 2011 -0700 - - Use float constants in qp rounding - Slight performance improvement and fixes slight difference in output between gcc 3.4 and 4.5. - -commit 97400fceb9388dda330ca05221b01989097e9496 -Author: Anton Mitrofanov -Date: Wed May 4 11:49:06 2011 +0400 - - Fix bugs with ratecontrol reconfiguration - Initialization of some parameters was missed or wasn't synchronized with other threads - -commit 41c56f5eebaade1c46a9124195c751d4d3d24daa -Author: Anton Mitrofanov -Date: Wed May 4 11:45:58 2011 +0400 - - More validation of input parameters - This fixes a crash with --me umh and insane values of --me-range. - -commit 91965e48fa97c194ab0c661b3d6d41e949426097 -Author: Anton Mitrofanov -Date: Sun May 1 17:28:56 2011 +0400 - - Fix bug in --b-adapt 2 with --rc-lookahead >248 - Problem caused by buffer overflow in strcpy. - -commit 788c2881c09795dbe2c00c8e73b0bfb4664c90d5 -Author: Oka Motofumi -Date: Thu Apr 28 13:13:49 2011 +0900 - - Check for invalid pixfmts in lavf demuxer - -commit 80a661e2364ede32e5797eeb5e7bfec452016082 -Author: Fiona Glaser -Date: Tue May 10 01:58:21 2011 -0700 - - Fix regression in r1944 - Broke sliced-threads + slice-max-size/slice-max-mbs. - -commit 29a58f4a1d148667eb0bd8eca07189f5d30d1142 -Author: Fiona Glaser -Date: Sun Apr 24 18:36:26 2011 -0700 - - Precalculate CABAC initialization contexts - Slightly faster encoding with lots of slices. - -commit 866bf26018c8d76c475d23e7fe028774e8ec9814 -Author: Fiona Glaser -Date: Sat Apr 23 21:22:13 2011 -0700 - - Avoid redundant log2f calls in mv cost initialization - Saves around 100 million clock cycles on x264 init. - -commit 040d45415d25547033f99ae059dbcf055583d8d2 -Author: Fiona Glaser -Date: Thu Apr 21 14:19:05 2011 -0700 - - CABAC residual: cleanup and optimizations - Also kill all Hungarian notation while we're at it. - Trim an instruction off cabac_encode_bypass. - -commit 773d969b848af3440735e05cd06c14026232a0cf -Author: Anton Mitrofanov -Date: Wed Apr 20 02:54:49 2011 +0400 - - Validate input parameters more carefully - Get rid of redundant warnings upon encoder_reconfig calls. - Also avoid encoder_reconfig turning off psy_rd/trellis. - -commit 7b3d7364e006f9e240c44ba9c5a43094c68e0892 -Author: Anton Mitrofanov -Date: Fri Apr 22 01:13:28 2011 +0400 - - Fix VFR MB-tree to work as intended - Should improve quality with FPSs much larger or smaller than 25. - -commit 303449825e3424d8661fd43dac170e5d85a09d4c -Author: Yasuhiro Ikeda -Date: Sun Apr 24 15:33:45 2011 +0900 - - Support more recent GPAC versions - -commit aa4e80ddbbca8a9ab217dfc8686efea5ebbf4315 -Author: Anton Mitrofanov -Date: Sat Apr 23 15:19:40 2011 +0400 - - Fix decoder desync with positive --chroma-qp-offset and zones - -commit e9389034a5e4812a185f4b66654925d8adf4c437 -Author: Anton Khirnov -Date: Wed Apr 20 10:53:44 2011 +0200 - - Use AVMEDIA_TYPE_VIDEO instead of deprecated CODEC_TYPE_VIDEO - - Fixes build with lavf/lavc 53. - -commit 039675b4d28731181bd49a0a076fb72148d8e962 -Author: Fiona Glaser -Date: Fri Apr 15 16:33:27 2011 -0700 - - Force pic-struct for Blu-ray compat + fake-interlaced - -commit 24db70d508d5afdc4e9f5ba017aa875d80fc1487 -Author: Anton Mitrofanov -Date: Thu Apr 14 12:14:52 2011 -0700 - - Fix open-gop with no-psy - -commit 0a937473a99701d986bb285056438355b0998a96 -Author: Steven Walters -Date: Thu Apr 14 11:09:45 2011 -0700 - - Fix build with disabled asm - -commit 42693c88c7c10f6156a4bb3a980a04eb23f02276 -Author: Fiona Glaser -Date: Wed Apr 6 02:16:42 2011 -0700 - - Improve Blu-ray compliance - Use dec_ref_pic_marking SEIs to repeat B-ref referencing information. - Don't allow B-frames to reference frames outside their minigop. - -commit e54ea0c803b63af5c473a6218ee466d5b34e5d5c -Author: Fiona Glaser -Date: Wed Apr 6 17:15:50 2011 -0700 - - Consolidate Blu-ray hacks into --bluray-compat - This option is now required for Blu-ray compatibility. - --open-gop bluray is now gone (using bluray-compat and open-gop implies a Blu-ray compatible open-gop). - This option doesn't automatically enforce every aspect of Blu-ray compatibility (e.g. resolution, framerate, level, etc). - -commit bdb88277a5080cd120df699373431cee95d57bc8 -Author: Fiona Glaser -Date: Tue Mar 29 05:33:44 2011 -0700 - - Add SSE support to rectangle.h for 16-byte stores - Uses GCC vector intrinsics; may be suboptimal on particularly old GCC versions. - -commit c52f879268118212ac12d8edd7943210726855fb -Author: Steven Walters -Date: Tue Apr 12 19:22:56 2011 -0400 - - Do not force Intel Compiler to target pre-mmx architecture for x86 - Caused a speed penalty against gcc equivalents. - -commit 97797d2dd4b41e22af651accd41c29e2a469decb -Author: Fiona Glaser -Date: Tue Apr 12 01:16:48 2011 -0700 - - Warn users when using --(psnr|ssim) without --tune (psnr|ssim) - This is a counter to the proliferation of incredibly stupid psnr/ssim "benchmarks" of x264 in which the benchmarker conveniently "forgot" --tune psnr/ssim, crippling x264 in the test. - -commit 59ce517a0213bd8505bb4e6315e2970df04dae6e -Author: Dylan Yudaken -Date: Thu Apr 7 16:06:19 2011 -0700 - - Remove redundant mbcmp calls in weightp analysis - -commit 7b4a60338d8e1465d1f617eaa326289c16b427e8 -Author: Anton Mitrofanov -Date: Wed Apr 6 22:48:57 2011 +0400 - - Use integer math for filler size calculation - -commit 0c3054f0dfc84b99b8305ebbeb647533a741994d -Author: Anton Mitrofanov -Date: Tue Apr 5 14:06:54 2011 +0400 - - Disable progress for FFMS input with --no-progress - -commit 9f38a5bf62ee6bd16444243066dd4f01aceace16 -Author: Michael Stuurman -Date: Thu Mar 31 13:45:22 2011 -0700 - - Fix bug in intra-refresh ratecontrol - Row SATDs were slightly incorrect. - -commit d6daf2b914a658ecc57346a7348f5f8400b003d2 -Author: Fiona Glaser -Date: Sun Apr 10 04:39:51 2011 -0700 - - Cosmetics: fix some signedness issues found by -Wsign-compare - -commit 2246e451e0545a534144f04ef5f2b5d23c2f1a38 -Author: Fiona Glaser -Date: Sun Apr 3 16:31:52 2011 -0700 - - Minor fixes - Fix a comment typo. - Align an array properly. - Make x264_scan8 unsigned: saves a bunch of movsxd instructions on x86_64. - -commit 1d9b1bc7bc75c35aee7c8f6e0a2ef80bfefc57ec -Author: Anton Mitrofanov -Date: Fri Mar 25 00:08:40 2011 +0300 - - Improve C99 support checks in configure - Fixes configuration with Intel compiler in some cases. - -commit 6cbc47d476f610218c7e973d2c806b24bb4dd1b9 -Author: Fiona Glaser -Date: Fri Mar 18 18:24:33 2011 -0700 - - Eliminate the possibility of CAVLC level code overflow - Instead, if it happens, just re-encode the MB at higher QPs until it fits. - -commit 34e3a69755995a23c1f10f34321521af4182e559 -Author: Henrik Gramner -Date: Sat Mar 12 23:21:09 2011 -0800 - - x86 SIMD versions of optimize_chroma_dc - SSE2/SSSE3/SSE4/AVX implementations. - About 3x faster. - -commit 49a32b91eda5afc05e8a4a22577f6182987205c6 -Author: Manuel Rommel -Date: Sun Dec 26 18:52:49 2010 +0100 - - Add Altivec version of mc_weight - -commit bd38b231d12f4deb9d0d43b1f5f22c157e1b115c -Author: Manuel Rommel -Date: Sun Dec 26 21:41:33 2010 +0100 - - Add Altivec versions of mbcmp_x functions - These aren't merged versions, they just call the existing asm code. - A merged implementation would of course be faster. - -commit 591d45ee98b29e92d14f1fff06f50c24d9f9262a -Author: Steven Walters -Date: Wed Mar 2 21:31:27 2011 -0500 - - Recognize cygwin as itself when not targeting mingw - Also fix broken thread detection on cygwin. - -commit e19e206cbf0547ebf0394d9542c429c55bd5409a -Author: Steven Walters -Date: Wed Mar 2 20:39:25 2011 -0500 - - Patch Intel's CPU dispatcher - Reduces Intel Compiler's bias against non-Intel CPUs. - - Big thanks to Agner for the original information on how to do this. - -commit 4c624dccf4d1e13653be90c26dac49664c0f8241 -Author: Steven Walters -Date: Mon Feb 28 19:07:40 2011 -0500 - - Intel Compiler support - - Big thanks to David Rudie, the original author of this patch. - -commit 2e66a20694b51a36246d04008aa526cba48d625c -Author: Kieran Kunhya -Date: Tue Mar 8 09:41:46 2011 +0000 - - Cosmetics: make struct definition braces consistent - -commit f2b079718b8658bf453f0276d28503984a6dcff1 -Author: Fiona Glaser -Date: Wed Mar 23 20:59:41 2011 -0700 - - Fix restoring of console title on Windows with ffms indexing - -commit 86f8a74a117fc697c58822c0dd7d9d841959151c -Author: Anton Mitrofanov -Date: Thu Mar 10 00:31:26 2011 +0300 - - Fix possible buffer overflow in mp4 muxer - -commit d78a296270c3d8bbff6d81176eb510c1a75d23c9 -Author: Nick Lewycky -Date: Mon Mar 7 18:10:36 2011 -0800 - - Remove inline asm syntax not supported by LLVM's assembler - Doesn't affect compiled output outside of LLVM. - -commit 937cae7115f4ef42ecc285c639b533456226b0c1 -Author: Fiona Glaser -Date: Fri Feb 18 17:50:42 2011 -0800 - - Fix 10L in r1912 - SSSE3 code got used in MMX/SSE2 and vice versa (in hpel). - -commit abc2283e9abc6254744bf6dd148ac25433cdf80e -Author: Daniel Kang -Date: Sat Jan 15 13:44:45 2011 -0500 - - Add AVX functions where 3+ arg commands are useful - -commit 7f918d15fbd8d6c65ae1548c058765ebc4b83203 -Author: Fiona Glaser -Date: Mon Feb 7 03:15:03 2011 -0800 - - Frame-packing 3D: don't place scenecuts on right views - Caused problems for some players. - -commit 3202f34117d0850eec9ec937cbb5fa72f89b849b -Author: Fiona Glaser -Date: Fri Feb 11 00:54:51 2011 -0800 - - Improve slice-max-size handling of escape bytes - More accurate but a bit slower. Helps deal with a few obnoxious corner cases where the current algorithm failed. - -commit afd969a67bc6f69dccd71e5e7e68755c49212cac -Author: Nathan Caldwell -Date: Thu Feb 10 21:25:00 2011 -0700 - - Use bs_write1 wherever possible in header writing - -commit c2659d26be6c20727aa78b699b1d282b3a3f2718 -Author: Fiona Glaser -Date: Tue Feb 8 14:48:18 2011 -0800 - - Remove obsolete mvcost init code - -commit 03a8f4c8e32bf03096344244271ade318e252ce1 -Author: Anton Mitrofanov -Date: Sun Feb 13 12:19:13 2011 -0800 - - Fix memory leak on encoder close if not all frames are flushed - -commit 228f57c2121b8473001bb58b13a075cedca033e7 -Author: Anton Mitrofanov -Date: Sat Feb 12 05:19:55 2011 -0800 - - Fix signedness bug in CPU detection - Luckily didn't affect anything due to C signedness rules. - -commit 33a44b55b815f135bdb46d77e660eaef56dc42b6 -Author: Fiona Glaser -Date: Fri Feb 11 13:47:27 2011 -0800 - - Fix dumb bug caused by stray semicolon - Caused noise reduction to run incorrectly in part of RD, but probably had no effect. - -commit 4a3e072ecd0e335c444dc80e49db0e6eaf59cef2 -Author: Anton Mitrofanov -Date: Thu Feb 10 05:05:53 2011 -0800 - - Fix malloc of zero size - Caused x264 to fail with some settings on systems that return a NULL pointer for malloc(0), like Solaris. - -commit f0b8dd33c0aa2a3487ca567d1f5207c90b1e314c -Author: Anton Mitrofanov -Date: Wed Feb 9 23:01:07 2011 +0300 - - Fix crash in mp4 muxer after failure of x264_encoder_open - -commit 24caade52252a0a41c4869525dde8e5a47c55063 -Author: Fiona Glaser -Date: Wed Feb 9 11:36:02 2011 -0800 - - Fix shadowed variable warning in ffms.c - -commit eaf5ce20cfd35c9fbb37e64e066ec61bd4bc5fcf -Author: Anton Mitrofanov -Date: Wed Feb 9 11:29:23 2011 -0800 - - Fix some Intel compiler warnings - -commit d147eea3cde028059f8c3ed65c49ad6692ecd629 -Author: Fiona Glaser -Date: Sun Feb 6 23:12:09 2011 -0800 - - Fix 10L in r1886 - Aspect ratio can't be set before SPS is initted. - -commit f1ae384f3f1987a389b1226150700bc83824c10e -Author: Fiona Glaser -Date: Fri Feb 4 20:49:45 2011 -0800 - - Improve update interval of x264cli progress information - Now updates every 0.25s instead of every N frames. - -commit 3fa4f5d25ae66ce7fd151c729ceceae13ec364b7 -Author: Fiona Glaser -Date: Sat Feb 5 01:16:49 2011 -0800 - - Windows: restore previous console title after encoding - MSDN docs claim that SetConsoleTitle's effect is reverted when the process terminates, but this doesn't always work properly. - Accordingly, manually revert the console title at the end of encoding. - -commit 7e288fcf3e5bd19afcfa8790976c75c7f6682731 -Author: Fiona Glaser -Date: Sat Feb 5 15:02:34 2011 -0800 - - Allow WEIGHTP_FAKE in interlaced mode - It seems to work fine as-is even though real weightp doesn't support interlacing yet. - -commit c1212c02dfb59118ac4363f61bbf3464042c250e -Author: Kieran Kunhya -Date: Wed Feb 2 11:01:13 2011 +0000 - - Output pic struct information in libx264 API - -commit 3240ec6c5284214c7af9f02dffd285014b3dae5d -Author: Fiona Glaser -Date: Sun Jan 30 00:00:09 2011 -0500 - - Enable FastShuffle on Penryn and Nehalem CPUs without SSE4 - -commit 12a37e22cf8c236433ccc8f105a85cd631fff685 -Author: Yasuhiro Ikeda -Date: Thu Feb 3 10:54:44 2011 +0900 - - Minor cosmetics in configure - -commit 6e57cced1034afa104358f6f12a70197181ad006 -Author: Anton Mitrofanov -Date: Fri Jan 28 18:44:24 2011 -0800 - - Various --help cosmetics - -commit c00e15b76b35beb95e66d39cf67828c36191d6e1 -Author: Loren Merritt -Date: Sun Jan 30 02:27:32 2011 -0800 - - x86inc.asm: error on duplicate functions - Compile error if there's two functions of the same name, instead of silently renaming one of them. - -commit ff3c1be48a673c479ad5d51ef1e97f59a369a035 -Author: Fiona Glaser -Date: Mon Jan 31 13:56:23 2011 -0800 - - Bump yasm version requirement to handle AVX - -commit b7c745c4a747629daba4dc6f765d32293cb4f3d6 -Author: Fiona Glaser -Date: Fri Feb 4 20:48:37 2011 -0800 - - Fix rare corruption with subme=10 - Also fix dumb bug in VBV emergency AQ scaling. - - Regression in r1881. - -commit f2ced3ff5f42784efe1b1d37738a645aad3fd52a -Author: Mans Rullgard -Date: Thu Feb 3 13:32:06 2011 -0800 - - Fix overflow in ARM NEON i16x16 planar pred - Patch backported from ffmpeg. - -commit 716cf882d7b31e6ffd9b00658a53227900e56cad -Author: Anton Mitrofanov -Date: Wed Feb 2 22:51:45 2011 +0300 - - Fix incorrect frame duration for VFR input for some frames - -commit d7c05794b6645de764ca9f9a0b71b15f9761eeda -Author: Anton Mitrofanov -Date: Tue Feb 1 00:43:03 2011 +0300 - - Fix possible division by zero in mkv and flv muxers on close - This could crash if anything failed before output.set_param (for example, incorrect params refused by x264_encoder_open). - Bug introduced in r1873. - -commit 8c881320850542a496be53a107d1a13290a03785 -Author: Fiona Glaser -Date: Fri Jan 28 15:19:06 2011 -0800 - - Fix reconfiguration of parameters that modify the SPS - For now, this is only aspect ratio. - -commit 49638791347bf895cfa6ce1d3985947fb905659e -Author: Fiona Glaser -Date: Fri Jan 28 14:03:08 2011 -0800 - - Fix possible crash on Phenom with lookahead thread disabled - Misalign mask needs to be set for the main thread on entry, too. - -commit 2ae5b902b8a1a3275d31586841d12dc3191b1389 -Author: Anton Mitrofanov -Date: Sat Jan 29 12:43:34 2011 -0800 - - Hotfix for some bugs in VBV emergency - -commit 2f676f6f7966a536536e4d33829a8030a0694259 -Author: Fiona Glaser -Date: Thu Jan 27 13:59:20 2011 -0800 - - Fix warnings in cpu.c - -commit ce7ee9d2eeed6a81ab9b2a7d8d9f4bc5674c18c7 -Author: Fiona Glaser -Date: Thu Jan 27 05:33:25 2011 -0800 - - Check for OS AVX support in addition to CPUID - Even if not using ymm registers, AVX operations will cause SIGILLs on unsupported OSs. - On Windows, AVX is only available on Windows 7 SP1 or later. - -commit e6025413ea3e4d9ee0e4b1e1b4d38a9eeb949d49 -Author: Fiona Glaser -Date: Tue Jan 18 00:52:03 2011 -0800 - - VBV emergency mode - Allow ratecontrol to select "quantizers" above the maximum. - These "quantizers" progressively decimate the source to avoid VBV underflow. - x264 is now VBV compliant even with input as evil as /dev/random. - -commit 68cda11b73471d090776cdbe5dbff7f8563fadb5 -Author: Fiona Glaser -Date: Wed Jan 12 09:54:33 2011 -0800 - - Initial AVX support - Automatically handle 3-operand instructions and abstraction between SSE and AVX. - Implement one function with this (denoise_dct) as an initial test. - x264 can't make much use of the 256-bit support of AVX (as it's float-only), but 3-operand could give some small benefits. - -commit 8fb87147d3152fb37724d7c2996ade9263ddd90e -Author: Fiona Glaser -Date: Tue Jan 11 11:04:52 2011 -0800 - - Double the base framerate for frame-sequential 3D files - A 60fps frame-sequential 3D file is really only 30 FPS, just alternating between eyes. - Accordingly, ratecontrol should treat it as if it was really 30 FPS. - This will increase the bitrate at the same CRF level for such videos when --frame-packing 5 is used. - -commit b2bf3f99c060fdbd930e9ed5500a05da1344c229 -Author: Yasuhiro Ikeda -Date: Thu Jan 20 23:12:01 2011 +0900 - - Add --input-fmt option to lavf input - Conforms to ffmpeg's `-f` option. - Use this when lavf fails to guess the input format. - -commit 240c82e70f68c430f459b067a811de8918ca7d79 -Author: Tony Young -Date: Fri Jan 21 13:06:28 2011 -0800 - - Two improvements to regression test script - Use SHA-1 hashes for temporary file names to avoid exceeding OS filename length limits. - Correctly return to the original branch after testing if you were on a branch. - -commit 123b298d98ea67f80a039d9a0b3b2519247e4922 -Author: Vittorio Giovara -Date: Fri Jan 14 10:02:33 2011 -0800 - - Add some missing values to the non-extended SAR table - -commit ee9bc136e9e6f0875308c9505a08360294e7cd4a -Author: Sean McGovern -Date: Fri Jan 14 02:10:12 2011 -0500 - - Bump dates to 2011 - -commit e0b101821f3a900fa2958194cb316a3440455d60 -Author: Fiona Glaser -Date: Tue Jan 18 12:31:26 2011 -0800 - - More correctly write frame-packing SEI flags - - Bug reported by Nero. - -commit 6d995330cc86f4d914ee718492121618bb0f37b6 -Author: Fiona Glaser -Date: Thu Jan 20 14:45:57 2011 -0800 - - Don't die in x264_encoder_close if an error occurred in x264_encoder_encode - Also clean up properly in x264.c (mostly useful for finding bugs in cleanup). - -commit 5696ec3b8c69b57bd7bf0692c31578439fce3b5d -Author: Fiona Glaser -Date: Sun Jan 23 21:03:14 2011 -0800 - - Fix reconfiguration of b_tff - Attempting to change field order during encoding could cause slight corruption. - - Also fix delta_poc_bottom to be correctly set if interlaced mode is used without B-frames. - -commit d4fbb266a0077e1c90e4f3baf19610db1565ecba -Author: Steven Walters -Date: Sun Jan 23 15:19:11 2011 -0500 - - Fix x264 CPU detection with >=64 CPUs on Windows - x264 won't actually use more than one processor group's worth of CPUs, however. - This isn't a problem, as a single x264 instance can't effectively use a full 64 cores anyways. - -commit 31467f3270e791d1fd4728abd6f84c35819f757e -Author: Holger Lubitz -Date: Fri Jan 21 19:13:57 2011 +0100 - - Remove high bit depth mmx quant - It was using pmuludq which is sse2, and the function isn't really possible without pmuludq. - -commit fb223b970976fab3edab500b112f670d6ec8dd2d -Author: Holger Lubitz -Date: Sat Jan 22 16:49:23 2011 +0100 - - Fix cacheline check in avg2 w20 cache32 - Didn't result in incorrect output, only slightly decreased speed on a few obsolete systems. - -commit bdf0ac7468b5543a259fd5dcf6f17474ead4fb05 -Author: Holger Lubitz -Date: Fri Jan 21 17:17:29 2011 +0100 - - Fix illegal instruction in high bit depth ssd_nv12_mmxext - Unfortunately paddq isn't available in mmxext, only in sse2 and up. - Also fixes to actually allow widths up to 16416/32832 without overflow. - -commit c583687fab832ba7eaf8626048f05ad1f861a855 -Author: Fiona Glaser -Date: Thu Dec 23 19:33:01 2010 -0500 - - VFR/framerate-aware ratecontrol, part 2 - MB-tree and qcomp complexity estimation now consider the duration of a frame in their calculations. - This is very important for visual optimizations, as frames that last longer are inherently more important quality-wise. - Improves VFR-aware PSNR as much as 1-2db on extreme test cases, ~0.5db on more ordinary VFR clips (e.g. deduped anime episodes). - - WARNING: This change redefines x264's internal quality measurement. - x264 will now scale its quality based on the framerate of the video due to the aforementioned frame duration logic. - That is, --crf X will give lower quality per frame for a 60fps video than for a 30fps one. - This will make --crf closer to constant perceptual quality than previously. - The "center" for this change is 25fps: that is, videos lower than 25fps will go up in quality at the same CRF and videos above will go down. - This choice is completely arbitrary. - - Note that to take full advantage of this, x264 must encode your video at the correct framerate, with the correct timestamps. - -commit 247f504d3c7ac64a87ed5a12bab0f6b99af5959c -Author: Fiona Glaser -Date: Fri Dec 31 22:54:16 2010 -0500 - - Improve reference ordering in interleaved 3D video - Provides a decent compression improvement when encoding interleaved 3D content (--frame-packing 5). - Helps more without B-frames and at lower bitrates. - Note that x264 will not do this optimization unless --frame-packing 5 is used to tell x264 that the source is interleaved 3D. - - Tests consistently show that interleaved frame packing is by far the best way to compress 3D content. - It gives a ~35-50% compression benefit over separate streams or top/bottom or left/right coding. - - Also finally add support for L1 reference reordering (in B-frames). - Also add support for reordered ref0 in L0 and L1 lists; could be useful in the future for other things. - -commit c081c854524099e65a2273bdbd67ac344b01ae03 -Author: Fiona Glaser -Date: Tue Dec 21 20:58:10 2010 -0500 - - Cosmetics: fref0/1 -> fref[2] and i_ref0/1 -> i_ref[2] - A much-needed refactoring, plus makes the next patch easier. - -commit e373f64359ec14cf4744a3d5a50eb3e00289805d -Author: Alex Wright -Date: Sat Dec 25 19:31:00 2010 +1000 - - Check an extra offset during weightp analysis - Up to 0.1 - 0.6 dB gain on some fade-ins with --weightp 1, less with --weightp 2. - -commit 8e3212863cd22b2c6f71acd61d575b7b25a7f1c1 -Author: Daniel Kang -Date: Tue Jan 4 15:27:38 2011 -0500 - - SSE2 high bit depth SSIM functions - - Patch from Google Code-In. - -commit 770718bc498bbc215c3f0876013de2b2b3c1db32 -Author: George Stephanos -Date: Sun Jan 2 11:26:10 2011 -0500 - - SSE2 high bit depth intra_predict_(8x8c|16x16)_p - - Patch from Google Code-In. - -commit bc8948fc0aa57bb9099dcd647fe5775322580e0a -Author: Joe Cortes -Date: Fri Dec 24 21:33:57 2010 -0600 - - MMX high bit depth coeff_last4 - - Patch from Google Code-In. - -commit af617efc12d39396e758adaa2b7b0447aed683c3 -Author: Daniel Kang -Date: Thu Dec 23 12:15:03 2010 -0500 - - SSE2 high bit depth zigzag_interleave_cavlc - - Patch from Google Code-In. - -commit 648147bbc16722e67173c588c662098267294d93 -Author: Daniel Kang -Date: Wed Dec 22 17:53:08 2010 -0500 - - MMX/SSE2/SSSE3 high bit depth frame_init_lowres functions - - Patch from Google Code-In. - -commit 6b04221c78325a91e2b9b7a3e6deba86d4d23ed6 -Author: Daniel Kang -Date: Thu Dec 23 23:19:39 2010 -0500 - - MMX high bit depth 4x4 intra predict functions - DDR and HD directions, as well as making HU faster. - Also enable some SSE2 versions of high bit depth functions that were added but not properly enabled. - - Patch from Google Code-In. - -commit f0f76f991280c3b90450095e7880b3791fa6a746 -Author: Daniel Kang -Date: Wed Dec 22 16:51:22 2010 -0500 - - SSE2 high bit depth 8x8 intra predict functions - DDL, DDR, VR, HU, and HD directions, as well as the 8x8 filter. - Also make 8-bit MMX VR faster, by backporting the optimizations from the high bit depth version. - - Patch from Google Code-In. - -commit df5d19b45cd364b8015f09cf2eeb2c3cd7739039 -Author: George Stephanos -Date: Wed Dec 22 15:44:03 2010 -0500 - - MMX/SSE2 high bit depth 8x8c intra predict functions - - Patch from Google Code-In. - -commit 1d22dd50b5792746ff28b2b4815c17c82bec5af3 -Author: Daniel Kang -Date: Sun Dec 19 16:31:59 2010 -0500 - - MMX version of high bit depth plane_copy - And various cosmetics. - - Patch from Google Code-In - -commit 341b61474a9bb29d9a1c1a007b7d0d1b0a10e117 -Author: Fiona Glaser -Date: Sat Dec 18 12:40:13 2010 -0800 - - Faster x86 predict_8x8c_dc, MMX/SSE2 high bit depth versions - -commit a36face6a7d9669be6a6e40626d530ef9ff31f30 -Author: Fiona Glaser -Date: Sat Dec 18 05:40:49 2010 -0800 - - SSSE3 high bit depth sad_aligned functions - -commit 6ecfa83c34b665ca9e98814babf4bd3e09ac6706 -Author: Daniel Kang -Date: Thu Dec 16 04:41:17 2010 -0800 - - MMX/SSE2 high bit depth interleave functions - - Patch from Google Code-In. - -commit 15595e6d94940064046c61e64ef9cea993f3e05c -Author: Joey Geralnik -Date: Wed Dec 15 09:14:56 2010 +0200 - - MMX/SSE2 high bit depth avg functions - - Patch from Google Code-In. - -commit c3937a516e9d865315662435fc03c42c31276b7e -Author: Daniel Kang -Date: Tue Dec 14 22:47:51 2010 -0500 - - MMX/SSE2 high bit depth deinterleave functions - - Patch from Google Code-In - -commit 8bed3a1418edf4b146d84445e692b17cf854bbe5 -Author: Fiona Glaser -Date: Wed Jan 5 23:29:36 2011 -0500 - - Shut up some incorrect gcc uninitialized variable warnings - -commit 116ff56c7f2b6b47fb73ae9fc30590caf038dc09 -Author: Anton Mitrofanov -Date: Sat Dec 25 00:55:14 2010 +0300 - - Write --crop-rect and --frame-packing options to x264 SEI - -commit 3f700a324bc445bce02433ce2a1444501f4c929a -Author: Fiona Glaser -Date: Wed Dec 15 13:00:14 2010 -0800 - - Add missing space to parameter SEI - -commit bb2d6b69e85797367fa29071c91c91a03a2daff2 -Author: Kieran Kunhya -Date: Tue Dec 28 00:54:28 2010 +0000 - - Fix typo in documentation - -commit 7882a05dfe0c3de9f2e26dcf93f321ce65a3e82d -Author: Anton Mitrofanov -Date: Sat Dec 18 08:29:18 2010 -0800 - - Fix redundant linebreaks in statsfile with weightp - -commit f5d4ca6a38ef6748dde02fe0603c1fa67fbd982f -Author: Ramiro Polla -Date: Wed Dec 15 14:35:02 2010 -0200 - - Use cross_prefix for strings in endian test and as test - -commit 5f7967020eed7cb99dafe1366af9642d05cea3cd -Author: Anton Mitrofanov -Date: Sun Jan 2 14:36:53 2011 -0500 - - Fix checkasm test for quant in high bit depth - Eliminate some spurious failures. - -commit 0c4fa824ffacadf226cf68cedcf78602769d15d4 -Author: Steven Walters -Date: Thu Dec 30 20:35:10 2010 -0500 - - Fix broken YV12 handling in the resize filter - -commit 712f6dff3ab96abeee4c7440302d81e245451d2c -Author: Fiona Glaser -Date: Wed Jan 5 22:21:18 2011 -0500 - - Fix bug with negative lookahead mb costs in high bit depth - -commit 0aa25f66eaafecbc0b6eb86d24c04119e0454e76 -Author: Daniel Kang -Date: Tue Jan 4 14:33:05 2011 -0500 - - Fix overflow in SSIM calculation in 10-bit - -commit 3c50b9b4cc52b91dcb71bfe2a542aee9fb9a9a97 -Author: Anton Mitrofanov -Date: Fri Dec 24 14:52:57 2010 -0500 - - Fix some possible overflows in VFR ratecontrol with extreme timebases - -commit 5b91a48c7b88d27201800dc204e743bd2e76051a -Author: Steven Walters -Date: Sun Jan 9 16:01:04 2011 -0500 - - Fix memory leak in lavf demuxer. - Leak only occurred with input files that have more than one video stream. - -commit 50cae3cf1db1065a3570bd9ef29059c8ab49979e -Author: Fiona Glaser -Date: Fri Dec 24 17:28:42 2010 -0500 - - Fix satd predictors with high bit depth - Resulted in odd CRF-mode results with --no-mbtree, as well as suboptimal VBV handling. - -commit d50760c144dc0ee5023e166c7dd35dccd00a32b3 -Author: Daniel Kang -Date: Fri Jan 7 23:05:50 2011 -0500 - - Fix compile error with high bit depth and disable-asm - -commit bab4eadd11ca59745dfce369c9fea427c73317a0 -Author: Fiona Glaser -Date: Sat Dec 18 08:22:34 2010 -0800 - - Really fix gcc win32 misalignment crash - gcc's -fno-zero-initialized-in-bss only works if an explicit initializer (e.g. = {0}) is used. - -commit 74ee50e539dc06bea6b4bbd2f674c21248c05970 -Author: Steven Walters -Date: Sat Dec 11 20:30:29 2010 -0500 - - Support for native Windows threads - - Patch originally by Pegasys Inc. - -commit 25a1ffb266f409fe657d834b87c47e63cdaded3b -Author: Daniel Kang -Date: Mon Dec 13 17:15:12 2010 -0500 - - MMX/SSE2 high bit depth weight_cache/offset(sub|add) functions - - Patch from Google Code-In. - -commit fd8cfd445016db99a99b7a4d3769e52599aeda0e -Author: Daniel Kang -Date: Wed Dec 8 17:56:22 2010 -0500 - - SSE2 high bit depth dequant functions - - Patch from Google Code-In. - -commit 7271fc01d55944eb91ac7fdf2d4c96952bd609b2 -Author: Daniel Kang -Date: Tue Dec 7 22:48:15 2010 -0500 - - SSE2 high bit depth zigzag functions - - Patch from Google Code-In. - -commit 6f4d6fe9b2abe7755d0e8f16375e790b82174c3b -Author: Daniel Alexandru Morie -Date: Tue Dec 7 06:11:02 2010 -0800 - - MMX/SSE2 versions of high bit depth store_interleave - - Patch from Google Code-In. - -commit 898579cca8b0d2f7a63a4c3f0534226529e6e933 -Author: Vittorio Giovara -Date: Fri Dec 10 20:43:00 2010 -0800 - - Add frame-packing SEI support for signalling 3D video - -commit dde9e9bc90fbd9e2553a04a5586085ba1500394e -Author: Fiona Glaser -Date: Sat Dec 11 03:48:59 2010 -0800 - - Allow 8x8dct+cavlc+lossless with subme>=6 - -commit 7281537747fb52efec3272fd5a155bf7339d3e7a -Author: Yasuhiro Ikeda -Date: Thu Dec 9 12:00:24 2010 +0900 - - Add interlaced/no-interlaced case to regression test script - -commit adb5d4bffb8bd9cf5cb170356bcdc931550da904 -Author: Yasuhiro Ikeda -Date: Thu Dec 9 11:59:49 2010 +0900 - - Save more memory with weightp in >8-bit - -commit 031b37d34b6b74f6806e8602b57fbb4f325c33b2 -Author: Yasuhiro Ikeda -Date: Thu Dec 9 11:57:38 2010 +0900 - - .gitignore more untracked file types - -commit 9e79fa38c4c0a98ae99b5d1d2e2e4480ed0c67d5 -Author: Anton Mitrofanov -Date: Tue Dec 7 17:49:21 2010 +0300 - - Work around gcc/ld alignment bug on win32 - Fixes problems due to misalignment of static zero arrays (win32 ld can't align .bss properly). - -commit 5043c17357f44bc875e4f90e586e236948826c72 -Author: Daniel Kang -Date: Tue Dec 7 15:19:46 2010 -0500 - - Fix high bit depth intra pred functions - And re-enable them accordingly. - - Patch from Google Code-In. - -commit 4fc1c711dc89541d251700b89d5e462a08a9f467 -Author: Fiona Glaser -Date: Sat Dec 11 13:37:09 2010 -0800 - - Fix weightp analysis with high bit depth - -commit 2a3023965940f638aaeff35bab16651682af07c0 -Author: Oskar Arvidsson -Date: Thu Dec 9 12:19:57 2010 +0100 - - Fix build error in high depth - Caused by multiple definitions of x264_add8x8_idct_sse2. - -commit 972de279b363210fdf4858e3efe7203e979b6e36 -Author: Fiona Glaser -Date: Tue Dec 7 03:15:46 2010 -0800 - - Hotfix for high bit depth - Temporary fix for some unaligned access crashes. - -commit f68e1f8d5d9c9d9d7f0b2f91b587254d0bbec3da -Author: Anton Mitrofanov -Date: Tue Dec 7 13:44:55 2010 +0300 - - Delete x264_config.h on distclean - -commit ef4a8d2e79049c8311a3ab78860557496688db93 -Author: Daniel Kang -Date: Thu Nov 25 19:44:56 2010 -0500 - - Tons of high bit depth intra predict asm - - Patch from Google Code-In. - -commit c801fc6c2a5d0804c41ea77205fa049f10452dfe -Author: David Czech -Date: Sat Nov 27 17:34:32 2010 -0800 - - SSE2 high bit depth 8x8/16x16 idct/idct_dc - - Patch from Google Code-In. - -commit abe11eaba564cfe564245dcac1f5e439a800ff1f -Author: Ramiro Polla -Date: Tue Nov 30 02:17:23 2010 -0200 - - Create and install x264_config.h - This header can be used to determine the bit-depth and license of libx264. - -commit ee6b482234b840e9dbce892b5b13f18f66d6fe54 -Author: Steven Walters -Date: Wed Oct 13 21:53:50 2010 -0400 - - Detect Avisynth initialization failures - Detect if there is a critical Avisynth initialization failure and print the associated error. - This, however, requires a feature present in the latest version of Avisynth alpha (2.6). - Previous versions are unaffected. - -commit 5b5b746834be4cb5c5d6ce275aed38e90d39cbd0 -Author: Fiona Glaser -Date: Thu Nov 25 22:12:07 2010 -0800 - - Automatically restrict QPs to avoid quantization (under|over)flow - --cqm jvt and similar should now work "out of the box" instead of requiring futzing with --qpmin. - -commit 90fa32a09d2f126c8be53cd2331c8bbd3f44fcce -Author: Anton Mitrofanov -Date: Sat Dec 4 23:29:08 2010 +0300 - - Don't try to get timecodes if reading frame failed - This fixes "input timecode file missing data for frame" warning with piped input where we don't know total number of frames. - -commit 8245feb264ce7b0ea75a654af9f90d74e45391e8 -Author: Oskar Arvidsson -Date: Thu Nov 25 23:05:21 2010 +0100 - - Fix possible overflow in sub4x4_dct in 10-bit builds - -commit 4c9fe3fbedf99a8e4920233ee7df7d4e4fe27f0e -Author: Fiona Glaser -Date: Mon Dec 6 14:19:09 2010 -0800 - - Fix bug in intra-refresh + threads - Intra refresh bar quality increase wasn't correctly applied. - -commit 7f3fed96bf012921cf71e877423f4afaf6ffeb2b -Author: Fiona Glaser -Date: Mon Dec 6 12:00:13 2010 -0800 - - Fix file handle leak in libx264 on error - -commit b15e52142bd25436dfb578fbee35eb89d337c015 -Author: Steven Walters -Date: Sun Oct 10 18:17:35 2010 -0400 - - Fix incompatible csp format issue - Problem occurred with unknown pixel formats and non mod2 resolutions in the resize filter. - -commit 88cc4b0d0e0e69107cfc26a7d4131341031ec4eb -Author: Anton Mitrofanov -Date: Sat Nov 27 15:54:39 2010 -0800 - - Really fix fittobox resize rounding code - -commit 23060612f09601a0602ad9ff25fa9dd31c1b362a -Author: Yusuke Nakamura -Date: Sun Dec 5 09:31:01 2010 +0900 - - Fix regression in rev1549 - Skip auto timebase denominator generation when generated timebase denominator exceeds UINT32_MAX. - Also fix double free. - -commit 6e41f7e2aa03aabbedccc40278f14ffbfb2cc0f8 -Author: Anton Mitrofanov -Date: Sun Nov 28 01:05:02 2010 +0300 - - Fix --tcfile-in if timecode v2 file starts from nonzero pts - -commit 75110e63aba0876d35ffd37bf8edbb47639f9bc6 -Author: Sean McGovern -Date: Fri Dec 3 22:30:51 2010 -0800 - - SPARC/Solaris build fixes - -commit ec1087a42bacd755d0a553fa13259d3af4add44b -Author: Fiona Glaser -Date: Thu Nov 25 16:47:29 2010 -0800 - - Fix typo in r1797 - -commit cab2d8ad24e29c0d78d799a496608a44554c23f3 -Author: Tony Young -Date: Wed Nov 24 16:58:38 2010 -0800 - - Add Python regression test script - - Patch from Google Code-In. - -commit 7e3019a3cef6710378c7d3090fa3d3348b59de6b -Author: Alex Wright -Date: Wed Nov 24 02:19:51 2010 -0800 - - Make --weightp 1 a better speed tradeoff - Since fade analysis is now so fast, weightp 1 now does fade analysis but no reference duplication. - This is the opposite of what it used to do (reference duplication but no fade analysis). - This also gives weightp's better fade quality to faster presets (up to superfast). - -commit aa5a32938309e649f0b0a258312c00719fb498c1 -Author: Daniel Kang -Date: Tue Nov 23 20:29:37 2010 -0500 - - SSE versions of some high-bit-depth DCT functions - Our first Google Code-In patch! - -commit 00524dfa8cef310b44a5e7dd3723c5072db5fa75 -Author: Anton Mitrofanov -Date: Tue Nov 23 23:06:51 2010 +0300 - - Clean up weightp analysis function - -commit 580c5f2e7d9808850632a3748faa529e380bdc1b -Author: Fiona Glaser -Date: Fri Nov 19 16:58:38 2010 -0800 - - Add API function to return max number of delayed frames - -commit dbed9592361abde6110c737d832367a0529815e7 -Author: Fiona Glaser -Date: Thu Nov 25 13:01:33 2010 -0800 - - Copy field order flag in encoder_reconfig - -commit cbc37c5b89466abc620f5c41b8602d4db399936c -Author: Yasuhiro Ikeda -Date: Wed Nov 24 23:09:54 2010 +0900 - - Cosmetics in configure - -commit 032880c9727bb1cefc4d1711c84152a5eca6fd07 -Author: Yasuhiro Ikeda -Date: Mon Nov 22 11:01:57 2010 +0900 - - Add some more info to `x264 --version` - -commit ca8f00c7604cac37bd3103135521ebdc2d94340b -Author: Fiona Glaser -Date: Sat Nov 20 23:30:42 2010 -0800 - - Change qpmin default to 0 - There's probably no real reason to keep it at 10 anymore, and lowering it allows AQ to pick lower quantizers in really flat areas. - Might help on gradients at high quality levels. - The previous value of 10 was arbitrary anyways. - -commit 1cf769740c0fd143cb03b4290ba5238fce13eff6 -Author: Fiona Glaser -Date: Thu Nov 25 13:01:16 2010 -0800 - - Fix ticks_per_frame check for VFR input - -commit b3dc88f65543b3f85661c71b7ffe96a6337b94f1 -Author: Steven Walters -Date: Mon Nov 22 10:31:05 2010 +0900 - - Fix configure so that boolean configuration options are 1/0 - - There are many cases of 1/undef, not 1/0. - -commit d50e971f34ce2b18a2e11162126dc4de9a5d6c5e -Author: Sean McGovern -Date: Sun Nov 21 01:59:33 2010 -0500 - - Only build SPARC VIS asm if high bit-depth is disabled - -commit 946f81551e3e8a1e6dc022a08788ba3004c9cf42 -Author: Sean McGovern -Date: Sun Oct 10 19:34:18 2010 -0400 - - Fix build on SPARC Solaris 10 - -commit 13d9e7021bbaa07049c0e2c34cc1389c293daef0 -Author: James Darnley -Date: Sun Nov 21 10:50:48 2010 +0100 - - Fix resize filter rounding code - -commit d9421c20385527c92236a82efeca0f6af4220d2f -Author: Anton Mitrofanov -Date: Mon Nov 22 17:17:36 2010 +0300 - - Fix regression in chroma weightp - Missing cache calls could cause artifacts, encoder/decoder desync. - -commit 729d9bcc46f3ab24f603ef7ab1603aee1669f32c -Author: Fiona Glaser -Date: Fri Nov 19 15:40:23 2010 -0800 - - Fix some crashes with high bit depth - Not all arrays were sufficiently aligned. - -commit f92aa4ecd9029c224afeaf9c59b3091602b6b641 -Author: Fiona Glaser -Date: Sun Nov 14 03:34:26 2010 -0800 - - Chroma weighted prediction - Like luma weighted prediction, dramatically improves compression in fades. - Up to 4-8db chroma PSNR gain in extreme cases (short, perfect fade-outs). - On actual videos, helps up to ~1% overall. - One example video with a decent number of fades (ef OP): 0.8% bitrate reduction overall, 7% bitrate reduction just counting chroma. - Fixes a lot of artifacts in fades at lower bitrates. - - Original patch by Dylan Yudaken . - -commit fa28f5b96dc61f13aec05ca75a24d0b34a5fc1b0 -Author: Fiona Glaser -Date: Thu Nov 18 08:51:27 2010 -0800 - - Support custom cropping rectangles - Supposedly useful for 3D television applications. - -commit 1382552b8c085af688e27f0417557ed69618051f -Author: Oskar Arvidsson -Date: Sun Nov 14 16:46:01 2010 +0100 - - Convert X264_HIGH_BIT_DEPTH to HIGH_BIT_DEPTH - Less verbose. - -commit abde94f64a2232f2ef6fb423d6138633442ef87a -Author: Oskar Arvidsson -Date: Sat Oct 30 20:16:33 2010 +0200 - - x86 asm for high-bit-depth pixel metrics - Overall speed change from these 6 asm patches: ~4.4x. - But there's still tons more asm to do -- patches welcome! - - Breakdown from this patch: - ~13x faster SAD than C. - ~11.5x faster SATD than C (only MMX done). - ~18.5x faster SA8D than C. - ~19.2x faster hadamard_ac than C. - ~8.3x faster SSD than C. - ~12.4x faster VAR than C. - ~3-4.2x faster intra SAD than C. - ~7.9x faster intra SATD than C. - -commit 3afd514e222d4c4f0c984d258b1c17c0f12d6b89 -Author: Fiona Glaser -Date: Sat Oct 30 19:13:05 2010 -0700 - - x86 asm for some high-bit-depth coefficient functions - ~7.9x faster denoise than C. - ~2.3x faster coeff_level_run than C. - ~6.6x faster coeff_last than C. - ~4.3x faster decimate_score than C. - - Also improve checkasm's decimate_score test. - -commit 612778d730df65acd4bc928aa3cd6770eb9c15e3 -Author: Oskar Arvidsson -Date: Sun Nov 14 03:33:30 2010 +0100 - - x86 asm for high-bit-depth motion compensation - ~8x faster qpel MC than C. - ~10x faster hpel than C. - -commit 7946d913a6e3c9d83c2ace10a4f01c5b4052d618 -Author: Oskar Arvidsson -Date: Thu Nov 4 02:13:43 2010 +0100 - - x86 asm for high-bit-depth quant - ~3.1-4.2x faster than C. - -commit 03c61538ae77f5bd5f6c4b0c7a3fc6f41c48bcf1 -Author: Oskar Arvidsson -Date: Sat Oct 30 16:55:48 2010 +0200 - - x86 asm for high-bit-depth DCT - Only MMX and DCT done so far; iDCT still needs asm as well. - ~4.4x faster than C. - -commit 515d560f84631bce4d12f04f47fe8074079de542 -Author: Oskar Arvidsson -Date: Sat Oct 30 11:42:52 2010 +0200 - - x86 asm for high-bit-depth deblocking - ~3.3x faster than C. - -commit 0016a8049a0e04a5719ddf24d0f03d1b332d7851 -Author: Oskar Arvidsson -Date: Sat Nov 13 14:42:54 2010 +0100 - - Use a 16-bit buffer in hpel_filter regardless of bit depth - This only works up to and including 10-bit (but we don't support anything higher yet). - -commit 0a6b2a688225a313fd934e5b01d48f7be3aa9f78 -Author: Henrik Gramner -Date: Tue Nov 16 21:23:12 2010 +0100 - - Use enums instead of magic numbers in x264_mb_partition_pixel_table - -commit 866ac45c4bb7b2bdae99dab241b7b344dc07fbe7 -Author: Karl Blomster -Date: Sun Nov 14 03:41:03 2010 -0800 - - Improve configure script logging - Now prints the test program that failed in addition to error messages. - -commit 845e22876cf668abb3764d58dbb91af86c1895ac -Author: Anton Mitrofanov -Date: Wed Nov 17 07:27:09 2010 -0800 - - Fix constrained intra pred mode selection - -commit 9c9f6340ebef4172eca5e5cf011826cadddb5012 -Author: Anton Mitrofanov -Date: Wed Nov 17 02:46:30 2010 +0300 - - Various high-bit-depth ratecontrol fixes - -commit b2f40814b2ef3e16a83e017f02af5d91187a4797 -Author: Loren Merritt -Date: Sun Nov 14 02:54:02 2010 -0800 - - Fix a crash in --dump-yuv for odd resolutions - -commit 8c08475df59f816145b2a8bef35039c5e11bd438 -Author: Anton Mitrofanov -Date: Thu Nov 11 01:40:52 2010 +0300 - - Improve flash detection algorithm change in r1765 - Now only disables scenecuts only near real end of video, not just prior to forced keyframes. - -commit 9df5214b5e5b5c3311d9e612fdebb2c36525648f -Author: Steven Walters -Date: Wed Nov 10 07:21:41 2010 -0500 - - Update ffms2 support for its latest API break. - -commit afd79f1de5d63570243d6b6462b03cae8fc6c683 -Author: Fiona Glaser -Date: Thu Nov 11 18:19:22 2010 -0800 - - Modify the x264 header accordingly if --disable-gpl is used - -commit 84bfe64ddf7bd22a579e76ae31562eed47a381e2 -Author: Fiona Glaser -Date: Thu Nov 11 22:25:31 2010 -0800 - - Save a bit of memory with weightp + high bit depth - -commit 5d7b2ab6c51ebb32ca2dd999f9694f81baa750c2 -Author: Fiona Glaser -Date: Sat Nov 13 04:38:44 2010 -0800 - - Fix bugs in qpfile parsing with omitted QPs - -commit 1e902646b2e1f470dadd268a4f45a699b10434ec -Author: Kieran Kunhya -Date: Fri Nov 12 21:53:28 2010 +0000 - - Fix HRD with intra-refresh - x264 was incorrectly calculating cpb_removal_delay with respect to the first keyframe. - It should have been calculating cpb_removal_delay with respect to the last keyframe. - -commit 180d081f2f1ca4d92235fbc776211e75607ea0db -Author: Anton Mitrofanov -Date: Wed Nov 10 07:34:40 2010 -0800 - - Fix bug in r1753 - Overflow compensation fix broke CRF with --no-mbtree. - -commit e6c22fef723b44ecab5f597cf24f642c1d54741d -Author: Fiona Glaser -Date: Sat Nov 6 17:47:27 2010 -0700 - - Improve flash detection's behavior near the end of the video - Flash detection catches situations like AAAABBCCDDDD, where A,B,C,D are frames in different scenes. - x264 would place a keyframe on the first "D". - However, if the video ended on the last "C", x264 would place a keyframe on the first "C", even though C classifies as a flash. - This change fixes this issue. - -commit 2f2ab0fa6c873c32363d7c3115f483fafdbe326f -Author: Fiona Glaser -Date: Sun Oct 31 15:51:48 2010 -0700 - - Improve quantizer handling - The default value for i_qpplus1 in x264_picture_t is now X264_QP_AUTO. This is currently 0, but may change in the future. - qpfiles no longer use -1 to indicate "auto"; QP is just omitted. The old method should still work though. - - CRF values now make sense in high bit depth mode. - --qp should be used for lossless mode, not --crf. - --crf 0 will still work as expected in 8-bit mode, but won't be lossless with higher bit depths. - Add bit depth to statsfiles. - - These changes are required to make the QP interface sensible in combination with high bit depth. - -commit d50a5bfd0b457c211a0ed2868b3e13be28dfa764 -Author: Fiona Glaser -Date: Wed Nov 3 23:17:08 2010 -0700 - - VFR-aware PSNR/SSIM measurement - First step to VFR-aware MB-tree and bit allocation. - -commit 506683ae4789946e45d328250e72b304810fdb0f -Author: Fiona Glaser -Date: Mon Nov 1 15:08:03 2010 -0700 - - Disable weightp offset=-1 dupes with high bit depth - They're a hack to compensate for crappy rounding, and thus not worth doing at high bit depth, which fixes most of the rounding issues. - -commit 6cff5834068758ecfcd38425f817320495bdd251 -Author: Fiona Glaser -Date: Sun Nov 7 17:27:38 2010 -0800 - - Make the ffmpeg -vpre error message more descriptive - -commit 3d96daca538d849e0b9b88c45f8c3820aed9628e -Author: Loren Merritt -Date: Sat Oct 30 14:39:50 2010 -0700 - - Add numeric names for the presets (0==ultrafast ... 9==placebo) - This mapping will of course change if new presets are added in between, but will always be ordered from fastest to slowest. - -commit b9461a15b33936a6fd5583da843c132d4fe030f6 -Author: Fiona Glaser -Date: Wed Oct 13 06:07:14 2010 -0700 - - Update benchmarks in doc/threads.txt - -commit af28501230c2c511aa8b33660ca8d35f50b613ea -Author: Loren Merritt -Date: Thu Oct 28 13:29:42 2010 -0700 - - Make the #if'd out naive ESA actually match the real implementation - -commit 259790037c2f72532cf6d6fa9e632ad08ddf9574 -Author: Fiona Glaser -Date: Mon Nov 1 19:19:23 2010 -0700 - - Move mv/ref prefetch code to the correct location - Prefetching of top blocks should be done under if(top), not if(left). - -commit 633f938dcf0e5c69947483da18acae7e88fdd99a -Author: Reinhard Tartler -Date: Tue Nov 9 23:57:12 2010 -0800 - - Link x264cli explicitly against lavf - Fixes some problems with crappy linkers. - -commit 15f4006c6d478cdfe8e456de6aa1ecf35af40be0 -Author: Fiona Glaser -Date: Mon Nov 8 22:14:58 2010 -0800 - - Fix CBR ratecontrol bug with extremely high qscales - Caused CBR ratecontrol to take a very long time to recover from extreme situations (e.g. /dev/urandom). - -commit 0d6c3f3c3bbe89b6d1f215c86c102502ea8c201d -Author: Fiona Glaser -Date: Mon Nov 8 21:03:01 2010 -0800 - - Disable overflow compensation in CRF mode - Wasn't designed with CRF in mind, and acts really weird with CRF+VBV. - -commit 95f1474fcdb0714f24185a25ec16a7da6671f2a0 -Author: Fiona Glaser -Date: Mon Nov 8 19:56:29 2010 -0800 - - Fix stupid bug in B-frame VBV size prediction - -commit 95268ca03408b2768ce2fcd768ef61d25ea6a1f6 -Author: Oskar Arvidsson -Date: Fri Oct 29 13:13:25 2010 +0200 - - Fix regression in checkasm in r1666 - Buffer is uint16_t* regardless of whether x264 was compiled with high bit depth or not. - -commit 8efd67c034190b415174fd03c3cfef4768345f11 -Author: Oskar Arvidsson -Date: Fri Oct 29 13:11:09 2010 +0200 - - Fix overflows in satd, sa8d and hadamard_ac with high bit depth - -commit 803864ff6cf5b6869a94ee9915d886e8c372e72a -Author: Oskar Arvidsson -Date: Fri Oct 29 12:34:42 2010 +0200 - - Fix potential problem with overflows in ssd_nv12 - The risk of overflows increases exponentially with the bit depth. - The 8-bit asm versions may still overflow with image widths >= 11008 (or 6604 if interlaced). - -commit 3db6b2c22cd1b8ee00c10ea6d705d6fbec8544d0 -Author: Fiona Glaser -Date: Sat Oct 30 14:36:01 2010 -0700 - - Fix syntax for some parameterless functions - Technically, such functions should be declared with (void), not (). - -commit 24a56d38867e848765c78605846a5d6097f5392c -Author: Steven Walters -Date: Sat Oct 30 16:51:01 2010 -0400 - - Fix fps reporting on mingw64 - _ftime on mingw64 uses __timeb32 which is broken. - Use ftime instead. - -commit 30085672e58c29db2fa107b7dab58e10647b6722 -Author: Manuel Rommel -Date: Sun Oct 31 19:19:10 2010 +0100 - - Fix compilation on PPC with some recent GCCs - -commit 3ffbfed7e8a45aeafde8eba55f944f280fe015aa -Author: Manuel Rommel -Date: Mon Oct 11 13:50:09 2010 -0700 - - Fix Altivec SATD with small strides - Fixes chroma ME and some of lookahead on PPC. - -commit 33edb51fbc5c2d040e1f0f6534d78ddbb8d11cae -Author: Holger Lubitz -Date: Sun Oct 3 19:07:00 2010 +0200 - - Address remaining cacheline split issues in avg2 - Slightly improved performance on core 2. - Also fix profiling misattribution of w8/16/20 mmxext cacheline loops. - -commit 87829c982a751a0d031340d1ed7fbada23039d40 -Author: Fiona Glaser -Date: Wed Sep 29 18:56:27 2010 -0600 - - Trim a few bytes off some x86 intra pred functions - -commit e4b44c2e267a8a0771777422c626aba51c8e5194 -Author: Yusuke Nakamura -Date: Fri Oct 1 00:37:39 2010 +0900 - - Move DTS compression from libx264 to x264cli - DTS compression is an ugly stupid hack and starting to encroach on unrelated areas like VBV. - Some people want it in the mp4 muxer for devices and/or splitters that don't support Edit Boxes. - We just say "throw these broken devices out the window". - DTS compression will remain as a muxer option, --dts-compress, at the user's own risk. - This option is disabled by default. - -commit 2eb4139f72d025fa4c77c4391d6f1b67ec2b6f8e -Author: Fiona Glaser -Date: Thu Sep 30 22:24:51 2010 -0700 - - Use a larger pic_init_qp with high bit depth - Modify pic_init_qs for consistency. - -commit 84bb443d53fb03d9899e266688d0af5587562c6c -Author: Fiona Glaser -Date: Sat Oct 2 23:56:52 2010 -0700 - - Update some of the information in doc/ - -commit 49e5105aad41370fbf5611b23f5daf8caf128789 -Author: Fiona Glaser -Date: Tue Sep 28 17:48:00 2010 -0700 - - Update header in depth.c - -commit ef905ad646dbabcc9c4d163862b9ab728db07a54 -Author: Fiona Glaser -Date: Sat Oct 2 23:12:41 2010 -0700 - - Remove some old unused stuff in the build tree - Regression test (hasn't been updated since svn). - Doxy (was never used). - -commit 34b590b127cfd1eee13db826a9a9e2ac9faf6a20 -Author: Anton Mitrofanov -Date: Wed Sep 29 00:19:06 2010 -0700 - - Various cosmetics - Exorcise some CamelCase. - -commit 2bd8c8f5f6041a8169ad25a1aea4d48223893cc2 -Author: Fiona Glaser -Date: Sun Oct 10 04:39:36 2010 -0700 - - Add missing mod4 stack check to sse2_misalign mc_chroma - Required for ICC compilation. - -commit 0dbc490af39525adbefc1757151e5801c79eac3b -Author: Anton Mitrofanov -Date: Fri Oct 8 18:08:23 2010 +0400 - - Fix 2pass ratecontrol with --nal-hrd cbr - -commit 8ee7b59a4afb230fba336bc08a9047f028708bfb -Author: Fiona Glaser -Date: Mon Oct 4 13:33:23 2010 -0700 - - Fix minor bug in intra pred with intra refresh - i8x8 blocks didn't properly avoid predicting from top-right when necessary. - This could cause intra refresh to not completely refresh the frame. - -commit a86866106530d852722d6c5ddc6a9d4274351715 -Author: Anton Mitrofanov -Date: Wed Sep 29 22:06:27 2010 +0400 - - Fix filter parsing with --extra-cflags="-DNDEBUG" - -commit 91b83f585a5cdc45a3fb83100b09a2fb9dacc02e -Author: Fiona Glaser -Date: Wed Sep 29 00:15:14 2010 -0700 - - Make sigint handler variable volatile - Didn't actually cause any problems, but is necessary because it can be modified by another thread (the signal call). - -commit 47e2609852b2de996071633c94de8d273b66ad05 -Author: Fiona Glaser -Date: Sun Sep 26 21:04:30 2010 -0700 - - Add High 10 Intra profile support (AVC-Intra) - x264 should now be able to encode compliant AVC-Intra 50. - With a 10-bit-compiled version of x264, a sample commandline for 1080i25 might be: - --interlaced --keyint 1 --vbv-bufsize 2000 --bitrate 50000 --vbv-maxrate 50000 --nal-hrd cbr - - Also print "Constrained Baseline" for baseline profile, since that's all x264 (and everything else in the world) supports. - Also reorganize parameter validation a bit to reduce some spurious warnings. - -commit 0467589e35295c522bdae382e0e3b021deea9919 -Author: Oskar Arvidsson -Date: Mon Sep 27 16:02:20 2010 +0200 - - Finish support for high-depth video throughout x264 - Add support for high depth input in libx264. - Add support for 16-bit colorspaces in the filtering system. - Add support for input bit depths in the interval [9,16] with the raw demuxer. - Add a depth filter to dither input to x264. - -commit b6b8aea6baaac8284a61f5879ba94a26a3cd6156 -Author: Alex Wright -Date: Sun Sep 19 05:08:22 2010 -0700 - - Chroma mode decision/subpel for B-frames - Improves compression ~0.4-1%. Helps more on videos with lots of chroma detail. - Enabled at subme 9 (preset slower) and higher. - -commit 361721986f678065069d40c70bf57747afc0284c -Author: Fiona Glaser -Date: Mon Sep 27 05:39:02 2010 -0700 - - Various cosmetics - -commit eacca4fa4c39c8140c3718ffbd82be4fb2baeba7 -Author: Fiona Glaser -Date: Tue Sep 21 17:11:00 2010 -0700 - - Make slice-max-size more aggressive in considering escape bytes - The x264 assumption of randomly distributed escape bytes fails in the case of CABAC + an enormous number of identical macroblocks. - This patch attempts to compensate for this. - It is probably safe to assume in calling applications that x264 practically never violates the slice size limitation. - -commit e02a6d46b8e3621a0c285ced56df727368ff1c7f -Author: Fiona Glaser -Date: Mon Sep 27 05:39:13 2010 -0700 - - Add missing emms for dump-yuv - -commit 99c9b6de276dd499b6d56e50c679e4033eb915ad -Author: Anton Mitrofanov -Date: Sat Sep 25 15:55:32 2010 -0700 - - Fix CFR ratecontrol with timebase != 1/fps - Fixes VBV + DTS compression, among other things. - -commit f655f8ad1554cef6fc0040d7b7fa2fcf22ba3b15 -Author: Anton Mitrofanov -Date: Mon Sep 20 13:10:13 2010 +0400 - - Fix DTS/bitrate calculation if the first PTS wasn't zero - Fix bitrate calculation with DTS compression. - -commit 8c4218c159931b2cb04958d0510368168698421f -Author: Anton Mitrofanov -Date: Sun Sep 19 19:11:06 2010 +0400 - - Fix regression in r1716 - -commit d2a886339597426c12a7ee9c6462bf89f85d91a6 -Author: Fiona Glaser -Date: Sun Sep 19 00:25:27 2010 -0700 - - Cosmetics in me.c and frame.c - -commit 947e71c3dbee76383546a768d9bf84a3883efbd6 -Author: Kieran Kunhya -Date: Mon Sep 13 15:09:06 2010 +0100 - - Add support for arbitrary user SEIs - This allows calling applications to insert SEIs that x264 doesn't know about while maintaining HRD/VBV accuracy. - -commit e3af0b67fd87066bf55001754d82e759479fe9d6 -Author: Steven Walters -Date: Wed Sep 15 20:42:08 2010 -0400 - - Add full chroma input flag to swscale - Improves quality of colorspace conversions involving RGB(A). - -commit 3145e67de9ec38ab5432023286f285e467355c05 -Author: James Darnley -Date: Fri Sep 17 04:06:59 2010 -0700 - - Add --disable-gpl option to configure - Used for commercially-licensed versions of x264. - Doesn't currently change anything, but may be used to disable GPL-only CLI tools, such as video filters, in the future. - Also print the x264 license and libavformat license in version info. - -commit 213a99d070ebd4f9aeffe7cb3ed9bd7fe755ec7f -Author: Fiona Glaser -Date: Fri Sep 17 04:03:27 2010 -0700 - - Update source file headers - Update dates, improve file descriptions, make things more consistent. - Also add information about commercial licensing. - -commit a35a495d8889b1265a558c06920b7e83c9cd1117 -Author: Fiona Glaser -Date: Wed Sep 15 12:06:47 2010 -0700 - - Fix intra refresh to not exceed max recovery_frame_cnt - The spec constrains recovery_frame_cnt to [0, MaxFrameNum-1]. - So make MaxFrameNum bigger in the case of intra refresh. - -commit f1c48203a9985d05ed97c32c9ff9c9d76cd8d9c8 -Author: Fiona Glaser -Date: Thu Sep 16 03:36:17 2010 -0700 - - Make intra refresh finish one frame faster - In some cases, the last frame of intra refresh was redundant. - Saves a few bits. - -commit 22f9984be388389e7f356e76074255797a4fed74 -Author: Fiona Glaser -Date: Tue Sep 14 12:20:00 2010 -0700 - - Fix intra refresh to not predict from invalid pixels - The blocks on the right side of the intra refresh column should not predict from top-right. - -commit 90c51a765a42e5db6ccf558695fc6abf54f7e1cd -Author: Steven Walters -Date: Mon Sep 13 18:47:33 2010 -0400 - - Add configure check for mingw64 prefixing - This compensates for the inconsistent prefixing seen in different versions of the compiler. - -commit 683abb46df51083cc4d9da2b73bad39421c510eb -Author: Manuel Rommel -Date: Sat Sep 4 19:31:53 2010 -0700 - - Update some Altivec function prototypes - Silences a lot of warnings. - -commit ceba5dd5aa576d5d9f4d7a4213303a07bce91c15 -Author: Takashi Hirata -Date: Mon Aug 30 18:13:49 2010 +0900 - - Add support for level 1b - This level is a stupid hack in the H.264 spec, so it's a stupid hack in x264 too. - Since level is an integer, calling applications need to set level_idc=9 to use it. - String-based option handling will accept "1b" just fine though, so CLI users don't have to worry. - -commit 818532b1b048ac98129f5aaee7f2322f407ae482 -Author: Fiona Glaser -Date: Thu Sep 2 15:29:29 2010 -0700 - - Use smaller values for idr_pic_id - Saves a few bits and fixes problems on certain fantastically terrible decoders, - such as the Apple iPad. - -commit d48f5f8c83a6ea5fbdd309b2ab6284bdf96550a9 -Author: Fiona Glaser -Date: Mon Aug 30 12:32:31 2010 -0700 - - Use POC type 2 for streams with no B-frames - Saves a few bits per slice header. - -commit 1a579035c7e093e236debc9649bff7362ef9782f -Author: Fiona Glaser -Date: Sun Aug 29 22:18:07 2010 -0700 - - Faster cabac_encode_ue_bypass - Use CLZ + a lut instead of a loop. - -commit 270c72d43aa58293fcbac257b00701f5ae6b103a -Author: Henrik Gramner -Date: Wed Sep 1 00:53:42 2010 +0200 - - Faster nal_escape asm - -commit 24a964f7f59bf6e501d0612c8c82b6d8b13fd033 -Author: Anton Mitrofanov -Date: Tue Aug 31 08:45:22 2010 -0700 - - Allow --demuxer forcing with known extensions - -commit 51df06a4bb61cd72d7f74c08768b2dd706da8322 -Author: Anton Mitrofanov -Date: Fri Sep 3 13:33:44 2010 -0700 - - Minor fixes/cosmeticcs in commandling parsing - -commit 3f46301fa2c26472beac370e85a03c1117abc2a4 -Author: Anton Mitrofanov -Date: Fri Sep 3 08:39:48 2010 -0700 - - Fix overflow in stats printing - -commit de0dd4aa8b593e390982d28e0906d067c1c7ede2 -Author: Anton Mitrofanov -Date: Sun Aug 29 16:35:32 2010 +0400 - - Fix bug in 2pass if the first P-frames are all skip - last_qscale_for was read before being initialized in this case, resulting - in the value from the previous iteration being used instead. - -commit 9fd187e63a2296a3334abc3f9ef1ade458ff671c -Author: Fiona Glaser -Date: Thu Aug 26 09:12:01 2010 -0400 - - Don't do deblock-aware RD if deblocking is off - -commit 268618932b0c065c1ab1eea26311f35937073c58 -Author: Fiona Glaser -Date: Sat Aug 21 00:15:53 2010 -0700 - - CAVLC "trellis" - ~3-10% improved compression with CAVLC. - --trellis is now a valid option with CAVLC. - Perhaps more importantly, this means psy-trellis now works with CAVLC. - - This isn't a real trellis; it's actually just a simplified QNS. - But it takes enough shortcuts that it's still roughly as fast as a trellis; just not quite optimal. - Thus the name is a bit of a misnomer, but we're reusing the option name because it does the same thing. - A real trellis would be better, but CAVLC is much harder to trellis than CABAC. - I'm not aware of any published polynomial-time solutions that are significantly close to optimal. - -commit 34649ace2ce4cb4bde4bcbdc78919dfca358d6a3 -Author: Fiona Glaser -Date: Sat Aug 21 16:51:39 2010 -0500 - - Add global #define for maximum reference count - This should make it easier to play around with reference frame counts that exceed the spec maximum. - -commit 2846aaa76f3e20b73225eaaa3f710ad701652152 -Author: Fiona Glaser -Date: Mon Aug 16 17:47:11 2010 -0700 - - Simplify addressing logic for interlaced-related arrays - In progressive mode, just make [0] and [1] point to the same place. - -commit da6c3ecc955a5a7757efed84c542defd5b0fcc9b -Author: Fiona Glaser -Date: Mon Aug 23 18:59:35 2010 -0400 - - Add missing emms to x264_nal_encode - Only matters for applications using the low-latency callback feature. - -commit ee62228587849cfbee75a37300515bb8d84a4f71 -Author: Fiona Glaser -Date: Tue Aug 17 14:38:41 2010 -0700 - - Fix 2 bugs with slice-max-size - Macroblock re-encoding didn't restore mv/tex bit counters (slightly inaccurate 2-pass). - Bitstream buffer check didn't work correctly (insanely large frames could break encoding). - -commit 8782049fa11d6d8451b88e553ab33707bcc9b6b8 -Author: Manuel Rommel -Date: Thu Aug 12 12:54:00 2010 -0700 - - NV12 version of Altivec chroma MC - -commit c9f17d9378245ad37bbad07aa5b4915b169292ff -Author: Fiona Glaser -Date: Tue Aug 10 16:55:05 2010 -0700 - - Deblock-aware RD - Small quality gain (~0.5%) at lower bitrates, potentially larger with QPRD. - May help more with psy, maybe not. - Enabled at subme >= 9. Small speed cost (a few %). - -commit 8b2e4a080a469d8b22a275b8afef3b19b566a4e9 -Author: Brad Smith -Date: Sun Aug 8 18:13:32 2010 -0400 - - Correct X header path usage in configure - Don't unconditionally set the header path for OpenBSD but do so if the - --enable-visualize flag is specified. - -commit 2cbe0df7aa31349dfe2903c409d089bda40c978d -Author: golgol7777 -Date: Sat Aug 7 23:01:46 2010 -0700 - - Fix lavf input with delayed frames - -commit 18de9f673aaaf28bdc160f1e98c221a3300d011c -Author: Alexander Strange -Date: Sat Aug 7 22:29:12 2010 -0700 - - Slightly improve the filtering section of x264 --help - -commit cb1ab4495ecf8b614e23f13ef4429474c0e3ab7c -Author: Fiona Glaser -Date: Sat Aug 7 22:32:06 2010 -0700 - - Fix debug message typo with DTS compression - -commit 57543fa6758532287d11583e42e475972790ee5c -Author: Yasuhiro Ikeda -Date: Tue Aug 3 22:10:15 2010 +0900 - - Try to guess input length for lavf input - Allows printing of progress indicator when using lavf input. - -commit a82a0ce26f47460b5cb1487ab758ff04c83b859a -Author: Yasuhiro Ikeda -Date: Tue Aug 3 22:07:36 2010 +0900 - - Workaround bug in fps/timestamp handling with lavf input - reordered_opaque in lavf doesn't work correctly in the identity case (no reordering). - Fixes incorrect output for some file types (e.g. raw in mov). - -commit 9c5a15e467a4a9d2ca804ab03d3925ff21f10390 -Author: Mike Matsnev -Date: Sun Aug 1 12:08:20 2010 -0700 - - Fix aspect ratio writing in the MKV muxer - The braindead Matroska spec dictates aspect ratio to be measured in pixels instead of, well, an actual aspect ratio. - -commit 4ad7cc5a431d3b3e6cb125d02d7fed4932e19542 -Author: Anton Mitrofanov -Date: Thu Jul 29 20:23:55 2010 +0400 - - Add libavcore check in configure - -commit 3882fdba49bec819eb9a8dc851224683e79b3fab -Author: Fiona Glaser -Date: Mon Jul 26 15:38:13 2010 -0700 - - Improve quantizer distribution with sliced-threads+VBV - Should help avoid cases of very uneven quantizer choice between slices. - -commit 445082856fec2be71d33d6415801317ceecc0cbd -Author: Fiona Glaser -Date: Wed Jul 28 11:42:06 2010 -0700 - - Remove dead code in slicetype.c - -commit abaa820979d4f0c2fc5944d30135e814a230d2d8 -Author: golgol7777 -Date: Wed Jul 28 00:54:38 2010 +0900 - - Fix incorrect duration/framerate/bitrate in flv header - -commit acd70bf7b9c922a00df7c96f248a17d419f8ed3d -Author: Fiona Glaser -Date: Wed Jul 28 14:23:53 2010 -0700 - - invalidate_reference fixes - invalidate_reference didn't actually invalidate the immediate previous frame, only frames that came before that. - Make sure that reordering is forced when invalidate_reference is used, so that the reference list is correct decoder-side. - -commit b476e0583896124e6ec33ccf7756b240deae0d96 -Author: Steven Walters -Date: Sun Jul 25 19:45:27 2010 -0400 - - Filtering system-related fixes - Fix configure to check for outdated libavutil in resize filter support. - Do not print an explicit error message in ffms when requesting a frame beyond the number of frames in the source. - Mention in --*help that filtering options can be specified as name=value. - Fix the shadowing warning in the resize filter on posix systems. - -commit 5cbf8cf1ee08772a75278c5d9b5cd8d39874e3bb -Author: Fiona Glaser -Date: Wed Jul 21 17:40:14 2010 -0700 - - Improve reference_invalid support - Reference invalidation can now be used to invalidate multiple frames at a time, rather than being limited to one per encoder_encode call. - -commit 0d70de178aba32900c671c4af62808147c17570e -Author: Loren Merritt -Date: Thu Jul 22 06:40:12 2010 +0000 - - Eradicate all mention of SI/SP-frames - -commit f57ef856c2d88b1201571a206eb78ff10a93c57f -Author: Fiona Glaser -Date: Wed Jul 21 11:25:11 2010 -0700 - - Fix stack alignment with MB-tree - Broke 2-pass with MB-tree when calling from compilers with broken stack alignment (e.g. MSVC). - -commit 7ff60b1bbb0fc6d3aaf7c3f2bffc8833ef763cd0 -Author: Steven Walters -Date: Sat Jul 17 17:43:37 2010 -0400 - - Avisynth 2.6 colorspace support - Use a customized avisynth_c.h to detect the new planar colorspaces. - -commit b9dd9cfb16ae3e8cd59fd9e71abfeff5b4a5ae2f -Author: Loren Merritt -Date: Thu Jul 15 23:49:03 2010 -0700 - - Prevent some cases of cache aliasing. - Avoid cases where image strides were a large power of 2. - Core 2: +3% speed at widths 898..960, +6% at widths 1922..1984, most other resolutions unaffected. - Nehalem and AMD: similar amount of speedup, but fewer resolutions affected. - -commit 22655d902e35b0eb8529803c8b4b80dce4a3428e -Author: Fiona Glaser -Date: Thu Jul 15 19:35:52 2010 -0700 - - Fix stack alignment for adaptive quant - Broke calls from compilers with broken stack alignment (e.g. MSVC). - -commit 96383a3bc717fee2476e6b1e3ff4a708152a37e2 -Author: David Conrad -Date: Thu Jul 15 18:58:28 2010 -0400 - - Fix compilation with shared ffmpeg libs - lavf input uses libavutil functions, so it must request flags for libavutil from pkg-config. - -commit e3687899bc45a49083acaaec116e7c70bbcfca37 -Author: Fiona Glaser -Date: Thu Jul 15 13:20:50 2010 -0700 - - Fix another PCM bug - CABAC assumes that NNZ is 0 or 1, not the number of actual nonzero coefficients. - Didn't actually break the output; only had a tiny effect on RD. - -commit f98fed632827610c80950fc201b7a5306968ffe9 -Author: Oskar Arvidsson -Date: Thu Jul 15 14:01:36 2010 +0200 - - Fix regression in r1666 - Broke encoding of PCM macroblocks. - -commit a026b7c6ae4e4cba03323bfa7b7af573370101fb -Author: Oskar Arvidsson -Date: Thu Jul 15 08:04:47 2010 +0200 - - Fix build with bit_depth > 8 - Definition of x264_cli_plane_copy was inconsistent with declaration. - -commit 387828eda87988ad821ce30f818837bd4280bded -Author: Loren Merritt -Date: Thu Jul 8 12:24:16 2010 -0700 - - Convert x264 to use NV12 pixel format internally - ~1% faster overall on Conroe, mostly due to improved cache locality. - Also allows improved SIMD on some chroma functions (e.g. deblock). - This change also extends the API to allow direct NV12 input, which should be a bit faster than YV12. - This isn't currently used in the x264cli, as swscale does not have fast NV12 conversion routines, but it might be useful for other applications. - - Note this patch disables the chroma SIMD code for PPC and ARM until new versions are written. - -commit c58954cc7c2516dd5f704a506da9fe824f34d9df -Author: Steven Walters -Date: Mon Jul 5 17:37:47 2010 -0400 - - Add video filtering system to x264cli - Similar to mplayer's -vf system. - Supports some basic operations like resizing and cropping. Will support more in the future. - See the help for more details. - -commit da978ebe60f4d3e08cff46704762d2471d280508 -Author: Fiona Glaser -Date: Tue Jul 6 13:39:44 2010 -0700 - - Eliminate edge cases for MV predictors - Saves a few clocks in mv pred. - -commit b4217e40d4ab41499f83cbcfa9542a9b0500835d -Author: Fiona Glaser -Date: Thu Jul 8 12:45:25 2010 -0700 - - Improve scenecut detection a bit - Put a minimum value on the scenecut threshold; makes x264 more likely to catch successive scenecuts (but might increase the odds of false detection). - This also fixes scenecut detection with keyint=infinite. - Also print keyint=infinite in the x264 SEI and statsfile correctly. - -commit de8a6e9d6b2bb2a0961abcaad4edc43d74702df9 -Author: Fiona Glaser -Date: Wed Jul 14 18:47:14 2010 -0700 - - Fix 8x8dct+slices+no sliced threads+cavlc+deblock - Deblocking was done slightly incorrectly. - Regression in r1612. - -commit b7f6428a0b188d9118626dd4dc64415266ce8b6f -Author: Fiona Glaser -Date: Thu Jul 8 16:20:48 2010 -0700 - - Fix off-by-one error in slice VBV predictor updates - -commit d6d614dee05b55e75b0f2938cc170a9eac312db2 -Author: Anton Mitrofanov -Date: Mon Jul 5 17:44:15 2010 +0400 - - Fix disabling of progress with --log-level - -commit c91f43a4b09dab84953f417e6d6662ec0fa7acb1 -Author: Oskar Arvidsson -Date: Fri Jul 2 04:06:08 2010 +0200 - - Support for 9 and 10-bit encoding - Output bit depth is specified on compilation time via --bit-depth. - There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow. - Input is still 8-bit only; this will change in the future. - - Note that very few H.264 decoders support >8 bit depth currently. - Also note that the quantizer scale differs for higher bit depth. For example, for 10-bit, the quantizer (and crf) ranges from 0 to 63 instead of 0 to 51. - -commit b7789b1f08e27103576d9b9f0feea9b75e2eca56 -Author: Fiona Glaser -Date: Wed Jun 30 13:55:46 2010 -0700 - - Support infinite keyint (--keyint infinite). - This just means x264 won't insert non-scenecut keyframes. - Useful for streaming when using interactive error recovery or some other mechanism that makes keyframes unnecessary. - - Also change POC logic to limit POC/framenum LSB size (to save bits per slice). - Also fix a bug in the CPB underflow detection code (didn't affect the bitstream, just resulted in the failure to print certain warning messages). - -commit e480c9c8b143422e3d51cb0abbb9e6578888852a -Author: Fiona Glaser -Date: Wed Jun 30 13:06:22 2010 -0700 - - Don't check i16x16 planar mode unless previous modes were useful - Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on Core i7). - Negligle effect on compression. - - Also make a few more arrays static. - -commit 43a4334670ae60d0f8a30d3e4bd530d3b90a1ce1 -Author: Steven Walters -Date: Sat Jun 26 16:28:49 2010 -0400 - - Centralize logging within x264cli - x264cli messages will now respect the log level they pertain to. - Slightly reduces binary size. - -commit 899bf0fdb91d85acf0fde88b9aa1cb01755e8c71 -Author: Lamont Alston -Date: Tue Jun 29 10:11:42 2010 -0700 - - Make open-GOP Blu-ray compatible - Blu-ray is even more braindamaged than we thought. - Accordingly, open-gop options are now "normal" and "bluray", as opposed to display and coded. - Normal should be used in all cases besides Blu-ray authoring. - -commit 4cd44841f5ea8816f81a7975480cea6da10ad1f5 -Author: Fiona Glaser -Date: Mon Jun 28 15:02:33 2010 -0700 - - Callback feature for low-latency per-slice output - Add a callback to allow the calling application to send slices immediately after being encoded. - Also add some extra information to the x264_nal_t structure to help inform such a calling application how the NAL units should be ordered. - - Full documentation is in x264.h. - -commit a0ce4b768d46137692531e2869800d7d3c419e42 -Author: Loren Merritt -Date: Sat Jun 26 20:55:59 2010 -0700 - - Simplify pixel_ads - -commit edc1135e59416b4311f54375b6659e7340c81193 -Author: Fiona Glaser -Date: Wed Jun 23 17:29:34 2010 -0700 - - Interactive encoder control: error resilience - In low-latency streaming with few clients, it is often feasible to modify encoder behavior in some fashion based on feedback from clients. - One possible application of this is error resilience: if a packet is lost, mark the associated frame (and any referenced from it) as lost. - This allows quick recovery from errors with minimal expense bit-wise. - - The new i_dpb_size parameter allows a calling application to tell x264 to use a larger DPB size than required by the number of reference frames. - This lets x264 and the client keep a large buffer of old references to fall back to in case of lost frames. - If no recovery is possible even with the available buffer, x264 will force a keyframe. - - This initial version does not support B-frames or intra refresh. - Recommended usage is to set keyint to a very large value, so that keyframes do not occur except as necessary for extreme error recovery. - - Full documentation is in x264.h. - - Move DTS/PTS calculation to before encoding each frame instead of after. - Improve documentation of x264_encoder_intra_refresh. - -commit 669cc1def2034a7ef55946df9f6e1ae13963eb8a -Author: Fiona Glaser -Date: Thu Jun 17 14:50:07 2010 -0700 - - Lookaheadless MB-tree support - Uses past motion information instead of future data from the lookahead. - Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available. - Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes. - Not on by default with any preset/tune combination; must be enabled explicitly if --tune zerolatency is used. - - Also slightly modify encoding presets: disable rc-lookahead in the fastest presets. - Enable MB-tree in "veryfast", albeit with a very short lookahead. - -commit d020c4274edab45314c6bcf324d05f21dd13a93c -Author: Lamont Alston -Date: Wed Jun 16 10:05:17 2010 -0700 - - Open-GOP support - Allows B-frames immediately prior to keyframes (in display order). - This helps reduce keyframe popping and improve compression with short keyframe intervals. - Due to a staggering display of braindamage in the Blu-ray spec, two open-GOP modes are available. - The two modes calculate keyframe interval differently: one based on coded distance and one based on display distance. - The latter is superior compression-wise, but for no comprehensible reason, Blu-ray requires the former if open-GOP is used. - -commit 81cada8effc3e91eec3f413772b3c1629e8beb4d -Author: Steven Walters -Date: Wed Jun 9 18:14:52 2010 -0400 - - Use threadpools to avoid unnecessary thread creation - Tiny performance improvement with fast settings and lots of threads. - May help more on some OSs with slow thread creation, like OS X. - Unify inconsistent synchronized abbreviations to sync. - -commit 1a3548cf7bbebe7aa69f2ec65f6d36dc08afafc8 -Author: Fiona Glaser -Date: Sat Jun 19 01:41:07 2010 -0700 - - Improve 2-pass bitrate prediction - Adapt based on distance to the end in bits, not in frames. - Helps in videos with absurdly simple end sections, e.g. black frames. - -commit af34dfe35f6421dd0cd262d2263111f5f1a11f2d -Author: Fiona Glaser -Date: Fri Jun 18 13:58:11 2010 -0700 - - SSE4 and SSSE3 versions of some intra_sad functions - Primarily Nehalem-optimized. - -commit 5a57688fa282c31f070f147790dec0793adc843b -Author: Fiona Glaser -Date: Sat Jun 19 03:27:33 2010 -0700 - - Improve HRD accuracy - In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision. - Accordingly, convert buffer management to work in units of timescale. - These accumulating rounding errors probably didn't cause any real problems, but might in theory cause issues in very picky muxers on extremely long-running streams. - -commit f2b78b93fa6568528a2ee0efb0b00834002df49a -Author: Fiona Glaser -Date: Tue Jun 22 14:20:46 2010 -0700 - - Use -fno-tree-vectorize to avoid miscompilation - Some versions of gcc have been reported to attempt (and fail) to vectorize a loop in plane_expand_border. - This results in a segfault, so to limit the possible effects of gcc's utter incompetence, we're turning off vectorization entirely. - It's not like it ever did anything useful to begin with. - -commit 8060431f0d60f97a9b5274ceb230fbcdb3e2cffd -Author: Anton Mitrofanov -Date: Sat Jun 19 01:44:56 2010 +0400 - - Fix SIGPIPEs caused by is_regular_file checks - Check to see if input file is a pipe without opening it. - -commit ed0b9b6df2bbb11268da9b1b4e7d3b217bc0b5c7 -Author: Fiona Glaser -Date: Tue Jun 15 05:15:42 2010 -0700 - - Fix compilation on ARM w/ Apple ABI - -commit 15501e340f0500eedb797390f74e6e35f58ba12e -Author: Holger Lubitz -Date: Wed Jun 9 13:59:06 2010 +0200 - - Faster mbtree_propagate asm - Replace fp division by multiply with the reciprocal. - Only ~12% faster on penryn, but over 80% faster on amd k8. - Also make checkasm slightly more tolerant to rounding error. - -commit c224a0a5d91fb9115071b4ff075e91d2b3f630e2 -Author: Diogo Franco -Date: Sun Jun 13 21:57:32 2010 -0300 - - Convert the OPT_ defines in x264.c to an enum - -commit 43792a1b1bbd00193bf26c21f37be03777b8eb6d -Author: Anton Mitrofanov -Date: Sun Jun 13 23:14:15 2010 +0400 - - Don't allow baseline profile streams with fake-interlaced - Indicate use of --fake-interlaced in encoding options SEI. - -commit 317001c67caffc6618c0b736d22e43795f1efed3 -Author: Havoc Pennington -Date: Thu Jun 10 16:28:52 2010 -0400 - - Allocate space for null terminator in param_apply_tune - -commit 4fda9276ff98445f491d316a51f8d2c89ec2d85b -Author: Anton Mitrofanov -Date: Thu Jun 10 21:33:46 2010 +0400 - - Fix regression in r1501. - Could cause slightly incorrect analysis in rare cases, but no serious encoding issues. - Also shut up gcc warning about pels_v. - -commit 9d36bceed51ac71b5cbf645b4900b5a7190840a0 -Author: Anton Mitrofanov -Date: Wed Jun 9 22:53:08 2010 +0400 - - Fix crash with --subme 0 + --weightp > 0. Regression in r1535 - -commit ffdb014fa27b703046349ba70f432b883d927a70 -Author: Henrik Gramner -Date: Tue Jun 8 16:29:16 2010 +0200 - - Replace some divisions with shifts - -commit 64fa70e78c459688197ce907326b21bf4799f8ab -Author: Anton Mitrofanov -Date: Tue Jun 8 02:43:37 2010 +0400 - - Warn about shadowed variable declarations - Also get rid of a few instances of variable shadowing. - -commit 1bc9ad14e4655780bdf509c6d29f4b1e9d447fe4 -Author: Fiona Glaser -Date: Mon Jun 7 14:26:05 2010 -0700 - - Template load_pic_pointers based on interlaced - Significantly speeds up cache_load in the non-interlaced case. - Also various other minor optimizations in cache_load and cache_save. - -commit fcda8dd98eaf9ceea950c95661f8228fb364fc0b -Author: Fiona Glaser -Date: Mon Jun 7 14:15:33 2010 -0700 - - Remove double-dereferences for MB width/height data - Store it in x264_t instead of going through the SPS. - -commit 894634306a53830d6f6f7a8d0ef927af414b3aad -Author: Steven Walters -Date: Sat May 22 20:54:35 2010 -0400 - - Exempt Win x86_64 from memalign hack - The API mandates all mallocs are 16 byte aligned. - Remove unused int that stores sizeof malloc in memalign hack. - -commit f9bc2de28f637fa199424f544c94aeabc551eeb4 -Author: Steven Walters -Date: Fri Jun 4 13:44:55 2010 -0700 - - Preprocessing cosmetics - Unify input/output defines to HAVE_* format. - Define values as 1 to simplify conditionals. - -commit 691e2db1ff45f98e9696a5b37b761da7d03a64f3 -Author: Fiona Glaser -Date: Thu Jun 3 21:31:10 2010 -0700 - - Take more shortcuts in i4x4/i8x8 analysis - Based on the scores of the H and V modes, rule out modes which are unlikely. - Small compression loss (0.1-0.5%) and large speed gain (10-30% faster intra analysis). - Not enabled in slower encoding modes. - - Also make C versions of the merged SATD functions in order to eliminate branches based on their availability. - -commit 3cd5117da50bc1925086f684f34d7d5422d28520 -Author: Fiona Glaser -Date: Wed Jun 2 15:47:26 2010 -0700 - - Display SSIM measurement in db as well - -commit bef006e64034f3d3d24fdb1b06a9ac605eae9e64 -Author: Anton Mitrofanov -Date: Tue Jun 8 01:03:03 2010 +0400 - - Make version.sh indicate "M" for local commits too - -commit 5ab417745cce40869ec59eb28fde8677e974c249 -Author: Alex Jurkiewicz -Date: Sun Jun 6 15:21:12 2010 +0800 - - Add error message for invalid [de]muxer selection - -commit 4f5d9bcea757f049c1a14dd902c4af76dee231c1 -Author: Nathan Caldwell -Date: Sun Jun 6 14:19:41 2010 -0600 - - Deduplicate the ALIGN macro, move it to common.h - -commit e02e3eb59e2ab921117b89bf302ac70b7628baa9 -Author: David Conrad -Date: Thu Jun 3 19:02:24 2010 -0400 - - Fix a use of ALIGNED_ARRAY_16 on ARM - -commit 032113205fd60e70b7e50b5109e94ec2062067e9 -Author: Fiona Glaser -Date: Tue Jun 8 15:41:17 2010 -0700 - - Add missing emms after nal_encode - Caused random, bizarre failures with some calling applications. - -commit 23a20180338226f8bcba05c46867f38eff750cc3 -Author: Fiona Glaser -Date: Tue Jun 8 15:38:32 2010 -0700 - - Fix crash in fake-interlaced at some resolutions - -commit da1bc99cd4c74499aca99cbfbfc014154bb32440 -Author: Yusuke Nakamura -Date: Wed Jun 2 22:27:57 2010 +0900 - - Fix no-mbtree + aq-mode=0 - - Regression in r1618. - -commit 36bbd4d2134106943b7496b376603a97010ce308 -Author: Fiona Glaser -Date: Wed Jun 2 01:07:44 2010 -0700 - - Add API function to fix x264_picture_t initialization - Calling applications that do not use x264_picture_alloc need to use x264_picture_init to initialize x264_picture_t structures. - Previously, if the calling application didn't zero x264_picture_t, Bad Things could happen. - -commit f857c08db810a10332b46f4c331ac098ec3db9c7 -Author: Yusuke Nakamura -Date: Wed Jun 2 17:02:31 2010 +0900 - - Fix Avisynth input - Regression in r1624. A more permanent solution to the problem will be committed later. - -commit e46bf243d4c05f9abb106573b4c46d4fe88caba2 -Author: Oskar Arvidsson -Date: Wed Jun 2 02:08:45 2010 +0200 - - Convert to a unified "dctcoeff" type for DCT data - Necessary for future high bit-depth support. - -commit 17a04af4e35de32822024caf91e6f75400593394 -Author: Oskar Arvidsson -Date: Wed Jun 2 01:35:38 2010 +0200 - - Convert to a unified "pixel" type for pixel data - Necessary for future high bit-depth support. - Various macros and extra types have been introduced to make operations on variable-size pixels more convenient. - -commit 7adf25b165b4c6c69c3bcba7ed949996dca6f116 -Author: Fiona Glaser -Date: Fri May 28 14:27:22 2010 -0700 - - Add API tool to apply arbitrary quantizer offsets - The calling application can now pass a "map" of quantizer offsets to apply to each frame. - An optional callback to free the map can also be included. - This allows all kinds of flexible region-of-interest coding and similar. - -commit 6589ad6dc6a2ac7599c5a19566306c274bd86853 -Author: Fiona Glaser -Date: Thu May 27 14:27:32 2010 -0700 - - x86 assembly code for NAL escaping - Up to ~10x faster than C depending on CPU. - Helps the most at very high bitrates (e.g. lossless). - Also make the C code faster and simpler. - -commit 9056470d688eeb0f337a1976576b3dac601d882c -Author: Fiona Glaser -Date: Fri May 28 14:30:07 2010 -0700 - - Re-enable i8x8 merged SATD - Accidentally got disabled when intra_sad_x3 was added. - -commit 156b119f3de35e458f037e87d1ccf467ad86da5b -Author: Henrik Gramner -Date: Sun May 30 22:45:14 2010 +0200 - - Some deblocking-related optimizations - -commit 260da1ce37ce8964b5a7dc697723d064d60b335e -Author: Henrik Gramner -Date: Thu May 27 22:18:38 2010 +0200 - - Optimize out some x264_scan8 reads - -commit 9dc7a03fa5bd36862c456e1b9b2cba238cb3c89c -Author: Fiona Glaser -Date: Thu May 27 10:42:15 2010 -0700 - - Add fast skip in lookahead motion search - Helps speed very significantly on motionless blocks. - -commit 0010a130bf8939cc66e576fa53b7a7ad94fe32f3 -Author: Fiona Glaser -Date: Wed May 26 12:55:35 2010 -0700 - - Merge some of adaptive quant and weightp - Eliminate redundant work; both of them were calculating variance of the frame. - -commit 19e1f24e09ee540a8517bd6d36de5c1b828c24b6 -Author: Fiona Glaser -Date: Thu May 27 12:31:41 2010 -0700 - - Fix omission in libx264 tuning documentation - -commit ccd20017c79b00abf61e9009e8b28a5eb440c985 -Author: Fiona Glaser -Date: Sun May 30 09:42:53 2010 -0700 - - Fix ultrafast to actually turn off weightb - -commit cb94c2bdc9989d55deb6899ab77b0d40d185ab21 -Author: Anton Mitrofanov -Date: Mon May 31 22:36:50 2010 +0400 - - Fix crash with MP4-muxing if zero frames were encoded - -commit 16adb51780fa73f260eb26f75715faf4b2cd9cb8 -Author: Fiona Glaser -Date: Mon May 31 11:14:22 2010 -0700 - - Fix cavlc+deblock+8x8dct (regression in r1612) - Add cavlc+8x8dct munging to new deblock system. - May have caused minor visual artifacts. - -commit f8bd69dc667aec425f84c9b5d13dbf85d08d5e05 -Author: Fiona Glaser -Date: Wed May 26 12:40:31 2010 -0700 - - Fix 10L in r1612 - Stats need to be calculated before deblock strength, not after. - Broke ref stats in x264cli (no affect on actual output). - -commit 4947b0fbe0882defe5f806a0c42978bd160d6da0 -Author: Fiona Glaser -Date: Tue May 25 12:42:44 2010 -0700 - - Overhaul deblocking again - Move deblock strength calculation to immediately after encoding to take advantage of the data that's already in cache. - Keep the deblocking itself as per-row. - -commit 57729402c7b34d91cab058c00a5f6e50a2ef72a3 -Author: Fiona Glaser -Date: Tue May 25 16:13:59 2010 -0700 - - Detect Atom CPU, enable appropriate asm functions - I'm not going to actually optimize for this pile of garbage unless someone pays me. - But it can't hurt to at least enable the correct functions based on benchmarks. - - Also save some cache on Intel CPUs that don't need the decimate LUT due to having fast bsr/bsf. - -commit 0f249f12470cef5187674f13bf2cfcb3938f8563 -Author: Fiona Glaser -Date: Mon May 24 11:13:22 2010 -0700 - - Slightly faster mbtree asm - -commit 4d41be9b18375a19a020c75e19db35c3d41834b3 -Author: Fiona Glaser -Date: Fri May 21 15:39:38 2010 -0700 - - Faster deblock strength asm on conroe/penryn - -commit 8423fd9ea4cc1b57478a294a77aa725d025fbfee -Author: Fiona Glaser -Date: Fri May 21 14:32:13 2010 -0700 - - Avoid an extra var2 in chroma encoding if possible - Also remove a redundant if. - -commit 0d74fbda1559edd4240a956ba4a232adf2c0c8c5 -Author: Fiona Glaser -Date: Fri May 21 13:07:12 2010 -0700 - - Avoid a redundant qpel check in lookahead with subme <= 1. - -commit a38f372cfe3c508594f49f6a02d50ea9418a4c09 -Author: Anton Mitrofanov -Date: Tue May 25 19:11:42 2010 +0400 - - Fix ABR rate control calculations - Incorrect frame numbers were used, resulting in slightly inaccurate ratecontrol. - -commit 30a202d2b96b6737b399e82dc82f51bc694ae790 -Author: Anton Mitrofanov -Date: Tue May 25 18:45:16 2010 +0400 - - Fix calculation of total bitrate printed after stop by CTRL+C - -commit 3e8068ab1c8b0fdc491950b1b50c2e5c3149a51e -Author: Kieran Kunhya -Date: Sat May 22 14:32:53 2010 +0100 - - Fix typo in fake-interlaced documentation - -commit 318d9d659619617ba6a04835f15ef133c1ddaaf1 -Author: Fiona Glaser -Date: Tue May 25 17:49:07 2010 -0700 - - Fix CABAC+PCM, regression in r1592 - Changes to queue in CABAC didn't get propagated to PCM code. - -commit 5f9003633736b288265481c57fa779ac200b96a0 -Author: Henrik Gramner -Date: Fri May 21 15:30:26 2010 +0200 - - Fix performance regression in r1582 - Set the correct compiler flags. - -commit 2ea35adf96ab0bdb830692492f38c98caa28684d -Author: Fiona Glaser -Date: Tue May 18 16:48:00 2010 -0700 - - Rewrite deblock strength calculation, add asm - Rewrite is significantly slower, but is necessary to make asm possible. - Similar concept to ffmpeg's deblock strength asm. - Roughly one order of magnitude faster than C. - Overall, with the asm, saves ~100-300 clocks in deblocking per MB. - -commit 4bebd7414cdc1a2a4c06004951686071c3a9b532 -Author: Anton Mitrofanov -Date: Fri May 21 10:33:45 2010 +0400 - - Fix different output with differing sync-lookahead - Also reduce memory consumption. - -commit a768a0e6a2063cab3ab287f73a600daf400d7ec1 -Author: Anton Mitrofanov -Date: Tue May 18 22:26:59 2010 +0400 - - Mark Win32 executable as large address aware - -commit 2b61248f4bcbb5f9df32d940732bc26d8feeda8c -Author: Kieran Kunhya -Date: Thu May 20 17:45:16 2010 +0100 - - Add "Fake interlaced" option - This encodes all frames progressively yet flags the stream as interlaced. - This makes it possible to encode valid 25p and 30p Blu-Ray streams. - Also put the pulldown help section in a more appropriate place. - -commit 0a3b4aded728e162cb9a59befd0a3da3553bee7a -Author: Alex Jurkiewicz -Date: Thu May 20 15:01:37 2010 +0800 - - Modify version.sh to output to stdout. - Update configure to match. - -commit a1d7bbe4d270479c5905a39e995814ce80f72587 -Author: Henrik Gramner -Date: Wed May 19 23:09:58 2010 +0200 - - Set correct filesystem permissions for various files - -commit 9a0c21a943cb669b4fc4d38044a0edfc9413291f -Author: Anton Mitrofanov -Date: Wed May 19 21:07:03 2010 +0400 - - Fix regression in r1566 - Intra stats need to be kept track of for fast intra decision. - -commit 1a08335a07adb4a60fc949a749b2dd71f5c11c02 -Author: Fiona Glaser -Date: Tue May 18 11:53:32 2010 -0700 - - Fix rc-lookahead in encoding options SEI in 2-pass with VBV - -commit 047ae529404ac663f85380054d8e446c55e7c2af -Author: Loren Merritt -Date: Mon May 17 14:08:37 2010 -0700 - - Reduce memory usage in 2-pass with b-adapt 2 - -commit 3267f35a63a05bad83e7c50df887984254346785 -Author: Fiona Glaser -Date: Sat May 15 14:48:58 2010 -0700 - - Overhaul CABAC: faster, less cache usage - Horribly munge up the CABAC tables to allow deduplication of some data. - Saves 256 bytes of L1d cache in non-RD, 512 bytes in RD. - Add asm versions of bypass and terminal; save L1i cache by re-using putbyte code. - Further optimize encode_decision. - All 3 primary CABAC functions fit in under 256 bytes of code total on x86_64. - -commit 52206369380c1b91d45fc8ee88f036b6e4fee5d5 -Author: Kieran Kunhya -Date: Thu May 13 19:13:35 2010 +0100 - - Fix typo in pulldown - -commit 8939a416c0553f2c0d494d126044211007c742fb -Author: Anton Mitrofanov -Date: Wed May 12 22:05:34 2010 +0400 - - Fix bitrate calculation in progress status - Was slightly incorrect due to using pts, which is out of order. - -commit 53eda22e7d245f3f435903a64fd99d7ecce79ab1 -Author: Anton Mitrofanov -Date: Wed May 12 01:57:38 2010 +0400 - - Fix crash with sliced-threads on Phenom - -commit af0c64f701d90cf9326185e954f3d85e25bfb338 -Author: Fiona Glaser -Date: Mon May 10 22:59:12 2010 -0700 - - Fix condition for printing rc=cbr in options SEI - Also fix crf-max formatting. - -commit 8c02c790353c3ef8ffd091567e04d4f73b8ad2f8 -Author: Henrik Gramner -Date: Mon May 10 23:27:36 2010 +0200 - - Shrink even more constant arrays - -commit dfba665aa511ffa2fa17fbb9c71e980c4216accc -Author: Fiona Glaser -Date: Sat May 8 12:07:13 2010 -0700 - - Add API function to trigger intra refresh - Useful for interactive applications where the encoder knows that packet loss has occurred on the client. - Full documentation is in x264.h. - -commit a7d75da6b4e7e4d57791a1b58abe164da61e6f00 -Author: Fiona Glaser -Date: Sat May 8 11:58:22 2010 -0700 - - Fix intra refresh behavior with I-frames - Intra refresh still allows I-frames (for scenecuts/etc). - Now I-frames count as a full refresh, as opposed to instantly triggering a refresh. - -commit 54e784fdf410bf6dd7dd2312251fbe576a0d03fd -Author: Anton Mitrofanov -Date: Thu May 6 10:03:31 2010 -0700 - - More cosmetics - -commit e997028964e4023552411176bce526c98c793d34 -Author: Fiona Glaser -Date: Thu May 6 00:53:20 2010 -0700 - - Fix unresolved symbol in r1573 - gnu ld didn't complain, but some other linkers did. - -commit c74934475b92b5dea2c48db8dd08c4ab0e93c31e -Author: Steven Walters -Date: Wed May 5 19:54:04 2010 -0400 - - Remove unnecessary --enable options - Change --enable-visualize to actually check for X11 support. - -commit 9ce2783458beaf3a66089a7c82ad0b5ede0c48bd -Author: Fiona Glaser -Date: Mon May 3 21:27:16 2010 -0700 - - Don't force row QPs to integer values with VBV - VBV should no longer raise the bitrate of the video. That is, at a given quality level or average bitrate, turning on VBV should only lower the bitrate. - This isn't quite true if adaptive quant is off, but nobody should be doing that anyways. - Also may result in slightly more accurate per-row VBV ratecontrol. - -commit 43564b799787749cf14a33a47e852d34de73758b -Author: James Darnley -Date: Sun May 2 16:30:50 2010 -0700 - - Add field-order detection to y4m demuxer - -commit d7268f19b909566e94760bc49b01a5596c0b4ac6 -Author: Fiona Glaser -Date: Sun May 2 11:45:15 2010 -0700 - - Fix sliced-threads + interlaced - Broken in r1546. - -commit 94123d65e8c23e8fa05b138f9770e58d975b1cc0 -Author: Fiona Glaser -Date: Sun May 2 11:41:36 2010 -0700 - - Improve temporal MV prediction - Predict based on the results of p16x16 search, not final MVs. - This lets us get predictions even if mode decision chose intra. - Also improves cache coherency. - -commit 8399311e5bccb75d6c1327d3ee050c68eefe8c5c -Author: Fiona Glaser -Date: Sat May 1 19:34:14 2010 -0700 - - More accurate MV prediction on edges in lookahead - -commit 15c02c2d10fcd532d873d08ac929d8f8cae694f9 -Author: Fiona Glaser -Date: Sat May 1 19:32:01 2010 -0700 - - Error out on invalid input stride - Might catch some crashes due to buggy calling applications. - -commit 68438826539ee3376e0469e16996a35e544176ef -Author: Fiona Glaser -Date: Sat May 1 00:18:01 2010 -0700 - - Remove unnecessary debugging assert - Shouldn't have been in r1568 to begin with. - -commit 795a64f1f26dee1bff676dd223f8c93a0a58e1fe -Author: Fiona Glaser -Date: Fri Apr 30 13:45:50 2010 -0700 - - Shrink some more constant arrays - -commit 311c4bb16a49e7a37408c3e29a6d385883592f11 -Author: Fiona Glaser -Date: Fri Apr 30 11:36:19 2010 -0700 - - Deduplicate asm constants, automate name prefixing - Auto-prefix global constants with x264_ in cextern. - Eliminate x264_ prefix from asm files; automate it in cglobal. - Deduplicate asm constants wherever possible to save data cache (move them to a new const-a.asm). - Remove x264_emms() entirely on non-x86 (don't even call an empty function). - Add cextern_naked for a non-prefixed cextern (used in checkasm). - -commit cca478edc595d507d6486d548448802461a74547 -Author: Fiona Glaser -Date: Fri Apr 30 09:57:55 2010 -0700 - - Shrink a few x86 asm functions - Add a few more instructions to cut down on the use of the 4-byte addressing mode. - -commit c490e416499d275be462cbf9e071df4a9a5b7484 -Author: Fiona Glaser -Date: Thu Apr 29 19:53:59 2010 -0700 - - Make options SEI use weight* instead of wpred* - More intuitive and maps more reasonably to the CLI options. - Breaks statsfile backwards-compatibility. - -commit 6d12fae91a5faa4f82917f5caaed4ddad39ac591 -Author: Loren Merritt -Date: Thu Apr 29 17:35:25 2010 +0000 - - r1548 broke subme < 3 + p8x8/b8x8 - Caused significantly worse compression. Preset-wise, only affected veryfast. - Fixed by not modifying mvc in-place. - -commit 13922ab880162530b1acee4dfddfd046dbdeb0f3 -Author: Henrik Gramner -Date: Tue Apr 27 01:44:33 2010 +0200 - - More write-combining - -commit a40aa64dadb89d371671d49419f3b763302925f5 -Author: Fiona Glaser -Date: Mon Apr 26 15:10:11 2010 -0700 - - Reduce lookahead memory usage, cache misses - Merge lowres_types with lowres_costs. - -commit a6410b8c28645326c332857fc47d985b9031617c -Author: Fiona Glaser -Date: Sun Apr 25 14:54:29 2010 -0700 - - Fix build on x86 with asm on but SSE off - -commit 22acdd610c9d9bdda31295c388a4d59d93b5d704 -Author: Fiona Glaser -Date: Sat Apr 24 13:55:51 2010 -0700 - - Don't calculate ref/partition stats if not necessary - -commit 7d38392b3818f056088ce4f475626bdd2be018f4 -Author: Fiona Glaser -Date: Sat Apr 24 13:07:18 2010 -0700 - - Split out MV prediction into mvpred.c - Make common/macroblock.c a bit less gigantic. - -commit 8a8d72fee877e32a300419114e02038ddb993d46 -Author: Loren Merritt -Date: Sat Apr 24 16:22:14 2010 +0000 - - Fix mv predictor clipping on non-x86 (regression in r1548) - -commit 2788cdf638060ebe021a2d33d72ea0b86608bedd -Author: Anton Mitrofanov -Date: Sat Apr 24 00:26:13 2010 +0400 - - Move getopt.c to x264cli sources from libx264 - Only affects builds on systems without getopt.c. - -commit 09f97ee9f910ef157e5186bd3ad82e7818cda144 -Author: Fiona Glaser -Date: Thu Apr 22 12:53:07 2010 -0700 - - Move deblocking code to a separate file - Should clean up frame.c a bit. - -commit b3005ee3fe778d4eade4d472ee9550120040caee -Author: Steven Walters -Date: Tue Apr 20 19:48:02 2010 -0400 - - fix ffms demuxer to support input timebase values > 2^31 - -commit a7e037971f777b107583c75af335067f3fd813e3 -Author: Fiona Glaser -Date: Tue Apr 20 16:53:06 2010 -0700 - - Fix 10l in cache_load changes - Broke constrained intra pred, probably not anything else. - -commit e2f0f1816c8e930800270b0cb2198416700761c1 -Author: Fiona Glaser -Date: Tue Apr 20 16:50:13 2010 -0700 - - Faster fullpel predictor checking - Also shave a few instructions off dia/hex motion estimation loops. - -commit 8d9fe0220794bb35a0e2b17ff9f0c0660b781bcb -Author: Loren Merritt -Date: Tue Apr 20 09:40:49 2010 +0000 - - Fix checkasm's generation of deblock inputs (regression in r1517) - -commit e091a5e32baed410d79f871667d5f28a4fdc5a35 -Author: Loren Merritt -Date: Tue Apr 20 09:17:18 2010 +0000 - - Fix printing of bitrate when timestamps aren't available - Doesn't affect x264cli, but was broken in some other apps in CFR mode. - -commit 21f1a3c438a8404f61f3f1f1e5270d3d7beaff9d -Author: Fiona Glaser -Date: Tue Apr 20 00:46:29 2010 -0700 - - Don't check mv0 twice - One less SAD in motion estimation. - Also rename bmv -> pmv; more accurate naming. - -commit 564cfb8a1173fe1e037c51e76af36e5e75fddfba -Author: Fiona Glaser -Date: Mon Apr 19 11:02:27 2010 -0700 - - Remove reordering restrictions from weightp - Apparently the spec does allow two consecutive copies of the same frame in the reference list. - This involves an incredibly ugly hack to wrap around the frame number. - Very slight compression improvement. - -commit df275d503348cce71c110e278f2f866e0ee87f5e -Author: Fiona Glaser -Date: Mon Apr 19 23:34:03 2010 -0700 - - Print intra chroma pred modes in stats - -commit e3c766fcc2edcdc0d753888a95aab778d9c07769 -Author: Fiona Glaser -Date: Sun Apr 18 22:54:48 2010 -0700 - - Add mv0 special case in pskip chroma MC - Significantly faster pskip MC. - -commit f25f234555462fcd284bde0d70744ed8d774968c -Author: Francois Cartegnie -Date: Sun Apr 18 13:04:59 2010 -0700 - - Fix build scripts to work with non-GNU tools - -commit 641a8d543d64c68fe7e1e2dd0e0ca966a4795855 -Author: Fiona Glaser -Date: Fri Apr 16 20:04:13 2010 -0700 - - Faster deblock reference frame checks - Use a lookup table to simplify logic - -commit 4e105e079314b2fe04742d5605ffb0d961c16813 -Author: Henrik Gramner -Date: Fri Apr 16 22:39:45 2010 +0200 - - Faster chroma CBP handling - -commit d48c3809d24e8cc7caff2c39ae1544a957452787 -Author: Fiona Glaser -Date: Fri Apr 16 11:36:43 2010 -0700 - - Fix issues with extremely large timebases - With timebase denominators >= 2^30 , x264 would silently overflow and cause odd issues. - Now x264 will explicitly fail with timebase denominators >= 2^31 and work with timebase denominators 2^31 > x >= 2^30. - -commit bb1294f18ad8dd938532cb4247c8b207726874ad -Author: Fiona Glaser -Date: Fri Apr 16 12:06:07 2010 -0700 - - MMX code for predictor rounding/clipping - Faster predictor checking at subme < 3. - -commit c1fb471c16332f93b71327c1783eacffb53548ec -Author: Fiona Glaser -Date: Fri Apr 16 03:06:46 2010 -0700 - - Fix four minor bugs found by Clang - -commit 60b158144c942016db5ae6adfa3040bd395e4006 -Author: Fiona Glaser -Date: Thu Apr 15 16:32:31 2010 -0700 - - Move deblocking/hpel into sliced threads - Instead of doing both as a separate pass, do them during the main encode. - This requires disabling deblocking between slices (disable_deblock_idc == 2). - Overall performance gain is about 11% on --preset superfast with sliced threads. - Doesn't reduce the amount of actual computation done: only better parallelizes it. - -commit 9df61bcc12b3c28e4cd743a2a789ef2f197fc1aa -Author: Fiona Glaser -Date: Wed Apr 14 14:43:25 2010 -0700 - - Prefetch MB data in cache_load - Dramatically reduces L1 cache misses. - ~10% faster cache_load. - -commit 72f79049c1c34ea5feb41a05f26c42f65451b681 -Author: Fiona Glaser -Date: Fri Apr 23 19:09:37 2010 +0000 - - Fix a ton of pessimization caused by aliasing in cache_save and cache_load - -commit f80446e889e5fb1734bc462115303593f3b093f3 -Author: Fiona Glaser -Date: Fri Apr 23 19:09:18 2010 +0000 - - Add CP128/M128 macros using SSE - -commit ef7036991ff50eed268b924e3f669c5e1afb7f92 -Author: Fiona Glaser -Date: Sun Apr 11 13:36:50 2010 -0700 - - Fix various early terminations with slices - Neighbouring type values (type_top, etc) are now loaded even if the MB isn't available for prediction. - Significant overall performance increase (as high as 5-10%+) with lots of slices (e.g. with slice-max-size). - -commit 25047b4042b18bfd7ef7d40fd48e904852da1ada -Author: Anton Mitrofanov -Date: Tue Apr 13 21:25:42 2010 +0400 - - Enable --fast-pskip on fast firstpass - -commit 2d3b31f574f2d1cf80a51cf2af7720ed30cd10b3 -Author: Steven Walters -Date: Tue Apr 13 08:44:37 2010 -0400 - - Make interlaced detection in avisynth only apply to field-based input - Fixes improper flagging of progressive sources. - -commit e4289459eae03c18733f617012b67cd00e31b6ab -Author: Anton Mitrofanov -Date: Tue Apr 13 19:55:12 2010 +0400 - - Set psy=0 in lossless mode - Doesn't actually affect output, just what's written in the SEI. - -commit 5c88af35b79bd59d9e12e7a7761fc0e29f9075c4 -Author: Loren Merritt -Date: Sun Apr 11 04:20:04 2010 +0000 - - Fix a use of sad_x4 that had non-mod64 stride - Minimal speed improvement, but fixes a violation of internal api. - -commit 8e098f8e53de9801f2c1b382992736ffbc1e74a6 -Author: Fiona Glaser -Date: Sat Apr 10 13:15:30 2010 -0700 - - Make keyint_min auto by default - Gives more reasonable default settings when using short GOPs. - -commit 04f73bedf6a61099de58b0e03c02dc4731768884 -Author: Fiona Glaser -Date: Sat Apr 10 00:49:19 2010 -0700 - - Faster mv predictor checking at subme < 3 - Simplify the predicted MV cost check. - -commit cec7764a9a3749d6f67ea25af3082178e4d70d34 -Author: Fiona Glaser -Date: Sat Apr 10 00:35:50 2010 -0700 - - Special case in qpel refine for subme=1 - ~15-20% faster qpel refine with subme=1. - Some minor cleanups in refine_supel. - -commit 9f053b5c5c59fca4c40ec7914c95b36d022c2887 -Author: Henrik Gramner -Date: Sat Apr 10 02:21:01 2010 +0200 - - Cosmetics: VLC tables - -commit 134e221530d246e78f986e14d1f6d25a52bb3836 -Author: Fiona Glaser -Date: Fri Apr 9 18:13:22 2010 -0700 - - Add faster mv0 special case for macroblock-tree - Improves performance on low-motion video. - -commit 2907fc6cc96dc4c8e5d8ac99553e2031c0c1b0ba -Author: Fiona Glaser -Date: Fri Apr 9 01:49:55 2010 -0700 - - Add miscompilation check for x264_clz - Running a Phenom-optimized build of x264 (e.g. -march=amdfam10) on a non-Phenom CPU didn't SIGILL; instead it would silently produce incorrect output. - Now, instead, it will error out loudly. - -commit d037de38df77e2594ed91f80e6ddd4e70e746e4a -Author: Anton Mitrofanov -Date: Wed Apr 7 12:17:20 2010 +0300 - - Fixing floating-point exception in level-checking - Doesn't cause any issues for x264cli, but might impact some calling apps that care (e.g. Delphi apps). - -commit 29820105cf31f3bc399e82450a2bf18944026f88 -Author: Fiona Glaser -Date: Thu Apr 8 18:44:16 2010 -0700 - - Save a few bits in multislice encoding - Set the initial QP for each slice to the last QP of the previous slice. - -commit 788b8b7e5ef458fc2c312f72415740807f43cf99 -Author: Alex Wright -Date: Thu Apr 8 01:25:55 2010 +1000 - - Early termination in 16x8/8x16 search - Combine the actual cost of the first partition with the predicted cost of the second to avoid searching the second when possible. - Reduces the number of times the second partition is searched by up to ~75% in non-RD mode, ~10% in RD mode. - Negligible effect on compression. - -commit 049b662b98e80bffa5e21f771f396559a13c3ced -Author: Fiona Glaser -Date: Wed Apr 7 07:45:00 2010 -0700 - - Make MV prediction work across slice boundaries - Should improve motion search with lots of small slices, e.g. with slice-max-size. - Still restricted by sliced threads (won't cross the boundary between two threadslices). - The output-changing part of the previous patch. - -commit 95df880ca172e995ea0d3bdd76544f8f84db7a64 -Author: Fiona Glaser -Date: Wed Apr 7 07:43:46 2010 -0700 - - Cleanup and simplification of macroblock_load - Doesn't do anything now, but will be useful for many future changes. - Splitting out neighbour calculation will make MBAFF implementation easier. - Calculation of neighbour_frame value (actual neighbouring MBs, ignoring slices) will be useful for some future patches. - -commit 459473b212a21aa280b7dd0c355ae73847a988a4 -Author: Fiona Glaser -Date: Wed Apr 7 03:10:03 2010 -0700 - - Add missing #include to display-x11.c - -commit bc7d6c3b758f7cf828ada74cac9a05435d8425ef -Author: Steven Walters -Date: Tue Apr 6 22:08:21 2010 -0400 - - Add TFF/BFF detection to all demuxers - Fix interlaced Avisynth input, automatically weave field-based input. - -commit df902b5b4b672016b03eb650618ff6bd3e188c96 -Author: Fiona Glaser -Date: Tue Apr 6 13:53:22 2010 -0700 - - Correctly mark output frames as BREF - Simplify pic_out code. - -commit e9726b63b92d9b704a9e8cbf9665ec0621ada5bb -Author: Kieran Kunhya -Date: Sat Apr 3 14:59:59 2010 -0700 - - Fix HRD compliance - As usual, the spec is so insanely obfuscated that it's impossible to get things right the first time. - -commit de9e381d23fa7574003502a514bed3b624a6e41b -Author: Alex Wright -Date: Sat Apr 3 14:50:26 2010 -0700 - - Better b16x8/8x16 early termination in B-frames - A bit slower but up to 1-2% better compression. - -commit 43d3e08fd1b1cd481acc8944d8f685f9fb383387 -Author: Fiona Glaser -Date: Fri Apr 2 12:23:52 2010 -0700 - - Fix 10L in B-skip improvement patch - -commit 4d92f3f1cc263695debcdc4c8fa5016504225ad3 -Author: Fiona Glaser -Date: Fri Apr 2 03:09:48 2010 -0700 - - Fix printing of SEI header with VBV + ABR - SEI header shouldn't say CBR unless bitrate == maxrate. - -commit 3a6946754b5d14914132aae2971c8318078672d2 -Author: Fiona Glaser -Date: Thu Apr 1 22:33:42 2010 -0700 - - Simplify slicetype_frame_cost - Avoid redundant calculations when VBV is on (due to the intra-only call). - Move most of the logic into per-MB code. - -commit 68cae61a9f484274594eeb264355f9c364f317c5 -Author: Fiona Glaser -Date: Thu Apr 1 15:51:59 2010 -0700 - - Faster CABAC state copying for small partitions - Save ~25 clocks per i4x4, i8x8, and sub8x8 RD call. - -commit 58d2349dd7aad34a2cf09be081670d510657eda1 -Author: Fiona Glaser -Date: Wed Mar 31 01:44:07 2010 -0700 - - Massive cosmetic and syntax cleanup - Convert all applicable loops to use C99 loop index syntax. - Clean up most inconsistent syntax in ratecontrol.c, visualize, ppc, etc. - Replace log(x)/log(2) constructs with log2, and similar with log10. - Fix all -Wshadow violations. - Fix visualize support. - -commit 3b31b6cd2ed6e368970171c5a36d66dcfc0917dd -Author: Fiona Glaser -Date: Tue Mar 30 23:30:09 2010 -0700 - - Fix array overread in b8x16 search - -commit d45ad67fd03c7bce60bc06d4cae074549a34b6c7 -Author: Fiona Glaser -Date: Mon Mar 29 19:03:13 2010 -0700 - - Faster direct check with subpartitions off - Also simplify the whole function a bit. - -commit 30fda434119c30f70b7b5124eb811b52a85cf768 -Author: Fiona Glaser -Date: Mon Mar 29 02:14:25 2010 -0700 - - Print crf-max with appropriate precision in SEI - -commit dab0ee2f4f67363511344ba9ba134cf32373f9d7 -Author: Yusuke Nakamura -Date: Mon Mar 29 00:05:30 2010 -0700 - - Fix 10l in timecode seeking - -commit 549b115a89c33db9776a39df5351f7a241877314 -Author: Yusuke Nakamura -Date: Mon Mar 29 13:51:02 2010 +0900 - - Fix 10L: Remove needless error check - This error check was for cfr input + --timebase, but that doesn't happen, and brings about a bug with vfr input. - -commit 9fbcc12abe4f78c0f0d9ba44813b97528d9532db -Author: Fiona Glaser -Date: Sun Mar 28 20:40:42 2010 -0700 - - Don't use 2 L1 refs with pyramid + ref=1 - Slightly faster encoding with ref=1. - -commit d427ae20edba2b1509ceb9b5dea39ec33ee7b1e8 -Author: Henrik Gramner -Date: Fri Mar 26 17:57:23 2010 -0700 - - Update copyright year in SEI header - -commit 0b720fee5b6adaf99c1b37c90af8e4023405d224 -Author: Fiona Glaser -Date: Fri Mar 26 15:33:20 2010 -0700 - - New "superfast" preset, much faster intra analysis - - Especially at the fastest settings, intra analysis was taking up the majority of MB analysis time. - This patch takes a ton more shortcuts at the fastest encoding settings, decreasing compression 0.5-5% but improving speed greatly. - Also rearrange the fastest presets a bit: now we have ultrafast, superfast, veryfast, faster. - superfast is the old veryfast (but much faster due to this patch). - veryfast is between the old veryfast and faster. - faster is the same as before except with MB-tree on. - - Encoding with subme >= 5 should be unaffected by this patch. - -commit 4805079dfe3173802e06630fa27841d57aed5952 -Author: Fiona Glaser -Date: Thu Mar 25 14:46:24 2010 -0700 - - Avoid redundant MV prediction in duplicate refs - -commit 54e09223021a67bc173efc9e91b02d5ccf81d188 -Author: Henrik Gramner -Date: Wed Mar 24 23:27:30 2010 +0100 - - Cosmetics in mvd handling - Use a 2D array instead of doing manual pointer arithmetic. - -commit de8f0ac83809fc127d3ed63abe6b2392698eea68 -Author: Fiona Glaser -Date: Wed Mar 24 07:25:01 2010 -0700 - - Fix make uninstall on systems with executable suffixes - -commit aad4437600e6f9945a42c024e11de4bf5a785a06 -Author: Fiona Glaser -Date: Tue Mar 23 14:00:58 2010 -0700 - - Add tune for still image compression - There has been some demand for this from companies looking to use x264 for still image compression (it can outperform JPEG or JPEG-2000 by a factor of 2 or more). - Still image compression is a bit different; because temporal stability isn't an issue, we can get away with far more powerful psy settings. - -commit 774dbb4795638f4b8ead6a77bc045584223f4d03 -Author: Henrik Gramner -Date: Mon Mar 22 02:59:50 2010 +0100 - - Pad non-mod16 resolutions using the correct field - - Improves compression of interlaced videos with non-mod16 heights. - -commit e4404fa3f491b8bfad496b300d065569e5a292bc -Author: Fiona Glaser -Date: Sun Mar 21 09:10:00 2010 -0700 - - Document slow/fast firstpass in --fullhelp - -commit 084adc2e54f78ecc0bb95966a2b179756c25a71e -Author: Holger Lubitz -Date: Sat Mar 20 20:41:21 2010 +0100 - - Fix some misattributions in profiling - Cycles spent in load_hadamard and the avg2 w16 ssse3 cacheline split code were misattributed. - -commit e77bbb6af56d2c7ff2f184e6cfdcac6f2328ccfa -Author: Fiona Glaser -Date: Sat Mar 20 17:07:12 2010 -0700 - - Much faster non-RD intra analysis - Since every pred mode costs at least 1 bit, move that part into the initial SATD cost. - This lets i4x4/i8x8 analysis terminate earlier. - If the cost of the predicted mode is less than the cost of signalling any other mode, early-terminate the analysis. - -commit d8d83a9624744b4fc79cf71d31ef32c2678c4dae -Author: Fiona Glaser -Date: Wed Mar 17 15:53:43 2010 -0700 - - Fix stack alignment in sliced threads - Could cause crashes when called from non-GCC-compiled applications. - -commit 18eed0b9ee1314cc3ba9d16c0e44401f62aba624 -Author: Henrik Gramner -Date: Tue Mar 16 01:46:00 2010 +0100 - - Cosmetics: use sizeof() where appropriate - -commit 137e233f39438654d0c7d17c8e723a8eecc02128 -Author: Fiona Glaser -Date: Mon Mar 15 00:01:57 2010 -0700 - - Split up analyse_init - Save some time by avoiding some unnecessary inits and moving other parts to per-thread init. - -commit 7a282a5892454f441d94e64e0a41c617472fa798 -Author: Henrik Gramner -Date: Mon Mar 15 01:19:45 2010 +0100 - - Reduce stack usage of b-adapt 2's trellis - Also remove some redundant code. - -commit 37b4707b7d868206ca2b35ac85c0fc7a7848838e -Author: Fiona Glaser -Date: Sun Mar 14 00:25:02 2010 -0800 - - Various motion estimation optimizations - Faster method of checking MV range. - Predict MVs and cache MVs/MVDs for bidir qpel-RD. - A whole bunch of other minor optimizations. - Slightly better performance and compression. - -commit 4c03ec69fc91c60ff250d25fe805d1d5105c5fcf -Author: Fiona Glaser -Date: Sun Mar 14 00:19:59 2010 -0800 - - Overhaul macroblock_cache_rect - Unify the rectangle functions into a single one similar to ffmpeg's fill_rectangle. - Remove all cases of variable-size cache_rect calls; create a function-pointer-based system for handling such cases. - Should greatly decrease code size required for such calls. - -commit 8b4cca0e41f39748bb45c5cf88231d052df4e8cf -Author: Fiona Glaser -Date: Sun Mar 14 16:48:22 2010 -0700 - - Make a bunch of small functions ALWAYS_INLINE - Probably no real effect for now, but needed for the next patch. - -commit 219505afc89c0bec136a65e68cb9fdfca6d9bf85 -Author: Loic Minier -Date: Wed Mar 10 05:26:46 2010 -0800 - - Two compatibility fixes - Add IA64 support in configure. - -commit 6f3a6d52e605acc9df8277acb5c7094190898d82 -Author: Henrik Gramner -Date: Fri Mar 5 03:19:47 2010 +0100 - - Faster x264_macroblock_encode_pskip - GCC is apparently unable to optimize out the calculation of a variable when it isn't used. - -commit 47092e82824ac0fb7f2ee370762feec2ae6d2a0a -Author: Fiona Glaser -Date: Sun Mar 7 04:10:30 2010 -0800 - - Much more accurate B-skip detection at 2 < subme < 7 - Use the same method that x264 uses for P-skip detection. - This significantly improves quality (1-6%), but at a significant speed cost as well (5-20%). - It also may have a very positive visual effect in cases where the inaccurate skip detection resulted in slightly-off vectors in B-frames. - This could cause slight blurring or non-smooth motion in low-complexity frames at high quantizers. - Not all instances of this problem are solved: the only universal solution is non-locally-optimal mode decision, which x264 does not currently have. - - subme >= 7 or <= 2 are unaffected. - -commit 639b18a6f9904039e46376c55ad60e24d8617ab6 -Author: Alexander Strange -Date: Sun Mar 7 02:57:04 2010 -0500 - - Reformat profile restrictions in --fullhelp. - - Put "no interlaced", "no lossless" on their own line to avoid them - running into the default options list. - -commit a9adb0d4ad942f4d0cf99750fbc124b173ba0a38 -Author: James Darnley -Date: Sat Mar 6 18:28:07 2010 -0800 - - Fix typo in configure - -commit fea8f42ebe6141272cda8dd2112ba5517432b1f6 -Author: David Conrad -Date: Sat Mar 6 10:29:57 2010 -0800 - - Add support for spaces to iPhone GAS preprocessor script - -commit 6ac9e171a44790f312b3cd0ae77b5213f04e16ba -Author: Yusuke Nakamura -Date: Sat Mar 6 19:25:30 2010 +0900 - - Fix slightly wrong mp4 duration. - -commit ddfe41245f68771f183a3b5caa740e3aa3adce79 -Author: Yusuke Nakamura -Date: Sat Mar 6 19:24:32 2010 +0900 - - Fix link errors with newest gpac cvs - gpac decided to randomly break API and require us to use their own custom malloc and free. - -commit 2a2db86dc2bad14e13b7568ee212435cd4e5f059 -Author: Kieran Kunhya -Date: Fri Mar 5 20:43:02 2010 +0000 - - Save a few bits in slice headers - Don't override the maximum ref index in the slice header if it's the same as the default. - Also update the naming of the relevant variables in the PPS. - -commit 415aac4ff746909ea45d5afe94ba256979e647bd -Author: Fiona Glaser -Date: Thu Mar 4 09:59:09 2010 -0800 - - Shrink some arrays in x264_t - Also remove an unnecessary assignment from cache_load. - -commit 30eb4abce119fe02304f16b0712a399e7e125c1d -Author: Fiona Glaser -Date: Wed Mar 3 11:22:29 2010 -0800 - - Use x264_log in more places instead of fprintf - -commit 89183a0e32256b94c0755eaf0d494a860fa0ef08 -Author: Anton Mitrofanov -Date: Wed Mar 3 10:14:20 2010 -0800 - - Fix two nondeterminisms - Move noise reduction data into thread-specific data. - Use correct reference list for L1 temporal predictors. - -commit 7ff23daa52db92d7fcc4633e8ad21f4f6a9107a5 -Author: Fiona Glaser -Date: Fri Mar 19 14:44:10 2010 -0700 - - "CRF-max" support with VBV - This is a rather curious feature that may have more use than is initially obvious. - In CRF mode with VBV enabled, CRF-max allows the user to specify a quality level which the encoder will never go below, even due to the effects of VBV. - This is not the same as qpmax, which is not aware of issues like scene complexity. - Setting this WILL cause VBV underflows in any situation where the encoder would have needed to exceed the relevant CRF to avoid underflow. - - Why might one want to do this even if it would cause VBV underflows? - In the case of streaming, particularly ultra-low-latency streaming, it may be preferable to drop frames than to display frames that are of too low a quality. - Thus, in extremely complex scenes, rather than display completely awful video, the streaming server could simply drop to a lower framerate. - Scenecuts, which normally look terrible under situations like single-frame VBV, could be handled by just displaying them a bit later and dropping frames to compensate. - In other words, it's better to see the scenecut 150ms delayed than for it to look like a blocky mess for 150ms. - - On the caller-side, this would be handled by detecting the output size of x264's frames and dropping future frames to compensate if necessary. - - This can also be used in normal encoding simply to ensure that VBV does not hurt quality too much (at the cost of potentially causing underflows). - This can help quite a lot when using single-frame VBV and sliced threads, where VBV can often be somewhat unstable. - -commit bb9b16b4722a1273885367f13f448516efe47ed1 -Author: Kieran Kunhya -Date: Tue Mar 2 00:57:10 2010 -0800 - - Blu-ray support: NAL-HRD, VFR ratecontrol, filler, pulldown - x264 can now generate Blu-ray-compliant streams for authoring Blu-ray Discs! - Compliance tested using Sony BD-ROM Verifier 1.21. - Thanks to The Criterion Collection for sponsoring compliance testing! - - An example command, using constant quality mode, for 1080p24 content: - x264 --crf 16 --preset veryslow --tune film --weightp 0 --bframes 3 --nal-hrd vbr --vbv-maxrate 40000 --vbv-bufsize 30000 --level 4.1 --keyint 24 --b-pyramid strict --slices 4 --aud --colorprim "bt709" --transfer "bt709" --colormatrix "bt709" --sar 1:1 -o - - This command is much more complicated than usual due to the very complicated restrictions the Blu-ray spec has. - Most options after "tune" are required by the spec. - --weightp 0 is not, but there are known bugged Blu-ray player chipsets (Mediatek, notably) that will decode video with --weightp 1 or 2 incorrectly. - Furthermore, note the Blu-ray spec has very strict limitations on allowed resolution/fps combinations. - Examples include 1080p @ 24000/1001fps (NTSC FILM) and 720p @ 60000/1001fps. - - Detailed features introduced in this patch: - - Full NAL-HRD compliance, with both VBR (no filler) and CBR (filler) modes. - Can be enabled with --nal-hrd vbr/cbr. - libx264 now returns HRD timing information to the caller in the form of an x264_hrd_t. - x264cli doesn't currently use it, but this information is critical for compliant TS muxing. - - Full VFR ratecontrol support: VBV, 1-pass ABR, and 2-pass modes. - This means that, even without knowing the average framerate, x264 can achieve a correct bitrate in target bitrate modes. - Note that this changes the statsfile format; first pass encodes make before this patch will have to be re-run. - - Pulldown support: libx264 allows the calling application to specify a pulldown mode for each frame. - This is similar to the way that RFFs (Repeat Field Flags) work in MPEG-2. - Note that libx264 does not modify timestamps: it assumes the calling application has set timestamps correctly for pulldown! - x264cli contains an example implementation of caller-side pulldown code. - - Pic_struct support: necessary for pulldown and allows interlaced signalling. - Also signal TFF vs BFF with delta_poc_bottom: should significantly improve interlaced compression. - --tff and --bff should be preferred to the old --interlaced in order to tell x264 what field order to use. - - Huge thanks to Alex Giladi and Lamont Alston for their work on code that eventually became part of this patch. - -commit 4d3c4787622d44eef8b813bc4324531546bd8aa5 -Author: Yusuke Nakamura -Date: Sun Feb 28 21:42:19 2010 -0800 - - Timecode input/output - --tcfile-in allows a user to specify a timecode v1 or v2 file to override input timestamps. - Useful for dealing with VFR input, especially when FFMS/LAVF support isn't available. - --tcfile-out writes a timecode v2 file containing the timecodes of the output file. - New --timebase option allows a user to change the stream timebase. - Intended primarily for forcing timebase with timecode files if necessary. - When using --seek, note that x264 will seek in the timecode file as well. - -commit 1f9393ebe9efbae2da2a70a61aad35d270bb15f6 -Author: Alex Wright -Date: Sun Feb 28 01:29:15 2010 -0800 - - Mixed-refs support for B-frames - Small speed cost, usually a few percent at most. Generally has lowest cost in cases when it isn't very useful. Up to ~2% better compression overall on highly complex sources. - - Also fix a few minor bugs in B-frame analysis and various bits of cleanup. - -commit a934f0fa4763f57e820b7c9b2cfcbc8c00447ba1 -Author: Henrik Gramner -Date: Mon Mar 1 22:01:04 2010 +0100 - - Faster rounding of chroma DC coefficients - -commit 9d71ff19a5002e2cd376716bfc25623ff80cd30b -Author: Holger Lubitz -Date: Wed Mar 24 00:54:39 2010 +0100 - - Faster cabac_encode_decision_asm - Minimizes instruction count, which also means smaller code. - Various other slight changes to allow more instruction level parallelism. - -commit 125b8f6c36ba4d6523add3b1815aaef95e6e95e6 -Author: Holger Lubitz -Date: Tue Mar 23 23:13:54 2010 +0100 - - Faster hpel_filter - On ssse3, use pmaddubsw for h filter too (similar to v filter). - Change 32-bit v and c filters to write the result non-temporal. - Add commented-out defines to disable non-temporal operation. - Hardly any black magic here, but still a measurable win especially for ssse3. - -commit 6c927227611840ce1e9fe6345fe9dc8cbcff039e -Author: David Conrad -Date: Sun Feb 28 20:34:09 2010 -0500 - - Ignore XYSCSS in y4m if the newer standard C tag is present - - Apparently y4mscaler will generate 4:2:0 files with XYSCSS set to 444 - -commit b1e607e1a26498232722e65449f2b2079f3cb5d1 -Author: Fiona Glaser -Date: Tue Mar 2 10:51:15 2010 -0800 - - Fix regression in r1450 - I_PCM blocks would cause x264 to crash or generate bad output. Simplify PCM handling. - -commit 5f77d1c4a03e7f3ca5a606ab07d1f3901b6d98c5 -Author: Fiona Glaser -Date: Sat Feb 27 14:26:02 2010 -0800 - - Fix crash with intra-refresh + aq-mode 0 - -commit 8ed734467e63bc9f528eabbfd8d58c7d7adec509 -Author: Fiona Glaser -Date: Fri Feb 26 05:04:48 2010 -0800 - - Fix regression in r1453 - r1453 broke psy-trellis with --trellis 2 - -commit 14faac1eaf0760edd2332cf2aaa53311d12df061 -Author: Fiona Glaser -Date: Thu Feb 25 02:07:48 2010 -0800 - - Fix regression in r1449 - Incorrectly placed thread MV check could result in rare thread MV internal errors, esp. with --non-deterministic. - These weren't fatal errors (x264 could recover and continue with slight compression loss). - -commit 269b36dbd1e6355f750ef66894423a1189597ef9 -Author: Fiona Glaser -Date: Wed Feb 24 20:51:43 2010 -0800 - - Cut size of MVD arrays by a factor of 2 again - Only store the MVDs of the edges of each MB. - - Thanks to Michael Niedermayer for the idea. - -commit 80949af24bee8d655c28874fea686174cc027678 -Author: David Conrad -Date: Wed Feb 24 19:39:57 2010 -0500 - - Disable Altivec and VIS optimizations when --disable-asm is specified - -commit 9eb6ec9f017c49a7d6979c72ce0d65a0fc104f0f -Author: Loren Merritt -Date: Tue Feb 23 23:50:23 2010 -0800 - - Fix a buffer overread on odd input resolutions - -commit 89aa4e87032a47562fae19f1ad0fbb3fe6db0ab9 -Author: Fiona Glaser -Date: Wed Feb 24 03:49:32 2010 -0800 - - Fix one bug, one corner case in VBV - qp_novbv wasn't set correctly for B-frames. - Disable ABR code for frames with zero complexity. - Disable ABR code for CBR mode; it is completely unnecessary and can have negative consequences. - -commit daa1342e3faac6949cb87f5d0bd4ed42c1fa572f -Author: David Conrad -Date: Wed Feb 24 00:29:21 2010 -0500 - - Port Mans Rullgard's NEON intra prediction functions from ffmpeg - -commit 4aa5679d89f887a338adf380d46a11c53a3d9f39 -Author: Fiona Glaser -Date: Tue Feb 23 13:52:15 2010 -0800 - - Remove unused function - Two other minor fixes. - -commit 232c5e278047da45d21f1961b5eeaf848a51be23 -Author: Fiona Glaser -Date: Tue Feb 23 10:00:41 2010 -0800 - - Use short startcode in more possible situations - Previous patch didn't cover all possible uses according to B.1.2. - -commit 714692cc27d8458da1f7192b066aefc93292fdc3 -Author: Fiona Glaser -Date: Tue Feb 23 09:50:12 2010 -0800 - - Fix fastfirstpass - Apparently the libx264 preset changes made "fastfirstpass" into "fastsecondpass" inadvertantly. - -commit 7fbc84b8d3a2e57ce04586459e20cef0f566f43b -Author: Anton Mitrofanov -Date: Tue Feb 23 09:10:26 2010 -0800 - - Fix various silly errors in the previous patches - -commit 1c14dca59a533b7b39d3ef2734683dfc69a10c25 -Author: Fiona Glaser -Date: Tue Feb 23 02:18:07 2010 -0800 - - Actually error out if preset/tune/profile is invalid - Got lost somewhere in the move to libx264-based presets. - -commit d43e46cf5b6a2730a6c216f9a584c1c7bb32d868 -Author: Fiona Glaser -Date: Mon Feb 22 17:33:17 2010 -0800 - - Faster probe_skip, 2x2 DC transform handling - Move the 2x2 DC DCT into the dct_dc asm function to avoid some store-to-load forwarding penalties and extra register loads. - Use dct_dc as part of the early termination in probe_skip. - x86 asm partially by Holger Lubitz. - ARM NEON asm by David Conrad. - -commit e6928564728fb467269b3a8f24f0c90d0b536630 -Author: Fiona Glaser -Date: Sun Feb 21 17:30:52 2010 -0800 - - Use short startcodes whenever possible - Saves one byte per frame for every slice beyond the first. - Only applies to Annex-B output mode. - -commit c9e8e4de68825662580d99d0cae6989455698c2c -Author: Anton Mitrofanov -Date: Sun Feb 21 13:21:11 2010 -0800 - - New algorithm for AQ mode 2 - Combines the auto-ness of AQ2 with a new var^0.25 instead of log(var) formula. - Works better with MB-tree than the old AQ mode 2 and should give higher SSIM. - -commit a4651264360e21d903214018f9ac24e0b503fa29 -Author: Fiona Glaser -Date: Sun Feb 21 13:20:19 2010 -0800 - - Abide by the MinCR level limit - Some Blu-ray analyzers were complaining about this. - -commit 76a8276f19ca5b01b3d54858cfc95ddc20fb2a71 -Author: Fiona Glaser -Date: Sun Feb 21 03:56:06 2010 -0800 - - Make b-pyramid normal the default - Now that b-pyramid works with MB-tree and is spec compliant, there's no real reason not to make it default. - Improves compression 0-5% depending on the video. - Also allow 0/1/2 to be used as aliases for none/strict/normal (for conciseness). - -commit 3e411be2a3132db8672cd6b2a33c159cfed79fb8 -Author: Fiona Glaser -Date: Sun Feb 21 01:56:12 2010 -0800 - - Move presets, tunings, and profiles into libx264 - Now any application calling libx264 can use them. - Full documentation and guidelines for usage are included in x264.h. - -commit 5e8645b3a53860b03838cc4a60682bceb91e919c -Author: Anton Mitrofanov -Date: Fri Feb 19 10:45:22 2010 -0800 - - Faster, more accurate psy-RD caching - Keep more variants of cached Hadamard scores and only calculate them when necessary. - Results in more calculation, but simpler lookups. - Slightly more accurate due to internal rounding in SATD and SA8D functions. - -commit 5c767904662ccb4703b421308d7270712f60b65b -Author: Fiona Glaser -Date: Thu Feb 18 17:01:38 2010 -0800 - - Much faster and more efficient MVD handling - Store MV deltas as clipped absolute values. - This means CABAC no longer has to calculate absolute values in MV context selection. - This also lets us cut the memory spent on MVDs by a factor of 2, speeding up cache_mvd and reducing memory usage by 32*threads*(num macroblocks) bytes. - On a Core i7 encoding 1080p, this is about 3 megabytes saved. - -commit 1ec69befa2f71f130a4a27dc2c1670489efe452d -Author: Fiona Glaser -Date: Thu Feb 18 10:37:57 2010 -0800 - - Add temporal predictor support to interlaced encoding - 0.5-1% better compression in interlaced mode - -commit 26a341ce0b7d78da9da1e899d716c5d73f626388 -Author: Fiona Glaser -Date: Wed Feb 17 22:41:16 2010 -0800 - - Keep track of macroblock partitions - Allows vastly simpler motion compensation and direct MV calculation. - -commit eafb549997bc39e7b27e3edf8b7c11518f78735e -Author: Fiona Glaser -Date: Tue Feb 16 10:13:33 2010 -0800 - - Much faster and simpler direct spatial calculation - -commit e5114f20bca3a282c0e7ee1267fc405e10df49af -Author: Fiona Glaser -Date: Sun Feb 21 14:21:26 2010 -0800 - - SimpleBlock requires Matroska Doctype v2 - -commit 681ff2b0749fdd2f27356a942a5eeff24d70c1b7 -Author: Yusuke Nakamura -Date: Tue Feb 16 11:05:21 2010 -0800 - - Add GPAC version check - -commit 930d7c11f60dd5656302bfc97865eac0ffac921e -Author: Fiona Glaser -Date: Tue Feb 23 01:44:44 2010 -0800 - - Fix stupid regression in interlaced in r1430 - With ref > 8 or b-pyramid, an array over-read could cause slightly incorrect B-frames. - -commit 43d3d921188b114fb8286a9f07370c23294f54f5 -Author: Fiona Glaser -Date: Mon Feb 22 13:04:47 2010 -0800 - - Fix overread of scratch buffer - Could cause crashes on non-mod16 frames. - -commit 466754b8dc7bdb6b32474d1feac8ff0e1451aefb -Author: Fiona Glaser -Date: Mon Feb 22 11:21:51 2010 -0800 - - Fix integer overflow in chroma SSD check - Could cause bad skips at very high quantizers on extreme inputs. - -commit 8b333f51feab0c90387294895466b1941d06acc2 -Author: Anton Mitrofanov -Date: Tue Feb 16 09:41:55 2010 -0800 - - Fix I and B-frame QPs with threads - Rounding errors resulted in slightly wrong QPs with threads enabled. - -commit 6703f0249c3ae8b7ccd799199051b41cb21761aa -Author: David Conrad -Date: Mon Feb 15 01:02:46 2010 -0800 - - Fix compilation on ARM - -commit 699b38e0dac5d9e2840ab278a8751de46dd5598b -Author: Loren Merritt -Date: Thu Jan 28 18:09:07 2010 +0000 - - Remove unnecessary PIC support macros - yasm has a directive to enable PIC globally - -commit 6953f9eedfa9ce625efc9f6afb5b76518429198c -Author: Fiona Glaser -Date: Sat Feb 13 11:19:38 2010 -0800 - - Don't even try direct temporal when it would give junk MVs - In PbBbP pyramid structure, the last "b" cannot use temporal because L0Ref0(L1Ref0) != L0Ref0. - Don't even bother analyzing it, just use spatial. - Should improve speed and direct auto effectiveness in CRF and 1-pass modes when b-pyramid is used. - Also makes --direct temporal useful with --b-pyramid, since it will fall back to spatial for frames where temporal is broken. - -commit 04996dfb749955610daeb9a35bf3e1230ead460a -Author: David Conrad -Date: Sun Oct 4 07:24:42 2009 -0400 - - iPhone compilation support - Also add --sysroot to configure options - - To build for iPhone 3gs / iPod touch 3g: - CC=/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/gcc ./configure --host=arm-apple-darwin --sysroot=/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.0.sdk - - For older devices, add - --extra-cflags='-arch armv6 -mcpu=arm1176jzf-s' --extra-ldflags='-arch armv6' --disable-asm - -commit b46cec4f0128df6dc5cea0fca4d73671fe11bbdc -Author: David Conrad -Date: Fri Jan 8 22:40:09 2010 -0500 - - ARM NEON versions of weightp functions - -commit 40d7215682b25d7e39d466b7277f06be88551672 -Author: David Conrad -Date: Sat Feb 13 01:25:56 2010 -0800 - - Use #ifdef instead of #if in checkasm - -commit fc94a28317cf760ea6dc2007ac5f5de683d2d376 -Author: Fiona Glaser -Date: Sat Feb 13 00:52:31 2010 -0800 - - Make the ABR buffer consider the distance to the end of the video - Should improve bitrate accuracy in 2-pass mode. - May also slightly improve quality by allowing more variation earlier-on in a file. - - Also fix abr_buffer with 1-pass: it does something very different than what it does for 2-pass. - Thus, the earlier change that increased it based on threads caused 1-pass ABR to be somewhat less accurate. - -commit 2e9ec3f66aff3b4bae016ad1c3f26d96c7f9c9cd -Author: Alexander Strange -Date: Sat Feb 13 02:22:04 2010 -0500 - - Mark cli_input/output_t variables as const when possible - -commit e49414918c8ed3ee38af734ae942292c8380ce87 -Author: Alexander Strange -Date: Sat Feb 13 02:00:57 2010 -0500 - - mkv: Write the x264 version into the file header - - This only updates the "writing application"; matroska_ebml.c is the - "muxing application", but the version string for that is still hardcoded. - -commit 18db4871662008c789980d20b869418e4b08574d -Author: Alexander Strange -Date: Sat Feb 13 01:41:41 2010 -0500 - - mkv: Write SimpleBlock instead of Block for frame headers - - mkvtoolnix writes these by default since 2009/04/13. - Slightly simplifies muxer and allows 'mkvinfo -s' to show B-frames - as 'B' (but not B-ref frames). - -commit 50c78eaea6f8a836dbbbc92f16c118c9fa7e58df -Author: Alexander Strange -Date: Mon Nov 10 00:55:20 2008 -0500 - - Allow | as a separator between psy-rd and psy-trellis values. - [,:/] are all taken when setting psy-trellis in a zone in an mencoder option. - - Also fix a comment typo and remove a useless line of code. - -commit b1c4cf9841beb88229da07ed60d7f1f394dfe341 -Author: Fiona Glaser -Date: Fri Feb 12 21:15:12 2010 -0800 - - Backport various speed tweak ideas from ffmpeg - Add mv0 early termination to spatial direct calculation - Up to twice as fast direct mv calculation on near-motionless video. - - Branchless CAVLC level code adjustment based on trailing ones. - A few clocks faster. - - Check tc value before clipping in C version of deblock functions. - Much faster, but nobody uses those anyways. - - Thanks to Michael Niedermayer for the ideas. - -commit 0ee8e84ed0ccc302c74f2c20a68969cfaa8f6951 -Author: Fiona Glaser -Date: Fri Feb 12 03:33:54 2010 -0800 - - Implement direct temporal + interlaced - This was much easier than I expected. - It will also be basically useless until TFF/BFF support gets in, since it requires delta_poc_bottom to be set correctly to work well. - -commit 8a57269d7ca3547f860568427423357166ba56c1 -Author: Fiona Glaser -Date: Wed Feb 10 13:44:28 2010 -0800 - - Allow longer keyints with intra refresh - If a long keyint is specified (longer than macroblock width-1), the refresh will simply not occur all the time. - In other words, a refresh will take place, and then x264 will wait until keyint is over to start another refresh. - -commit 282bbbc5ff53aec253c5076a3a83bd19ba4e9104 -Author: Fiona Glaser -Date: Wed Feb 10 12:12:29 2010 -0800 - - Overhaul sliced-threads VBV - Make predictors thread-local and allow each thread to poll the others to get their predicted sizes. - Many, many other tweaks to improve quality with small VBV and sliced threads. - Note this may somewhat increase the risk of a VBV underflow in such extreme situations (single-frame VBV). - This is tolerable, as most relevant use-cases are better off with a few rare underflows (even if they have to drop a slice) than consistent low quality. - -commit 50582675b0f20b923e629fb7e245900459e1e0b2 -Author: Fiona Glaser -Date: Tue Feb 9 15:08:31 2010 -0800 - - Print psy-(rd|trellis) with more precision in userdata SEI - -commit fd189536dae73a14ae7cf4217fc6473dfcf5ddcb -Author: Fiona Glaser -Date: Mon Feb 15 00:55:16 2010 -0800 - - More formatting fixes in x264 help - -commit 346f1679273b1235795145008f6390c291e89577 -Author: Henrik Gramner -Date: Mon Feb 8 15:53:52 2010 -0800 - - Faster 2x2 chroma DC dequant - -commit e2c56268167522f6eaa0d3cc5fd38d11a7b48b1d -Author: Yusuke Nakamura -Date: Mon Feb 8 01:48:38 2010 -0800 - - Write PASP atom in mp4 muxing - Adds container-level aspect ratio support for mp4. - -commit 46819d56855b9a67efed6b164ad732dea86632f0 -Author: Fiona Glaser -Date: Wed Feb 3 20:27:57 2010 -0800 - - Fix 2-pass ratecontrol continuation in case of missing statsfile - Didn't work properly if MB-tree was enabled. - -commit 50b4cfbfec180b75d2a8dcaea9da502b4c5bbef4 -Author: Fiona Glaser -Date: Wed Feb 3 20:01:16 2010 -0800 - - Smarter QPRD - Catch some cases in which RD checks can be avoided; reduces QPRD RD calls by 10-20%. - -commit d9b6077d2be991628670c6e2780a403820fa5de7 -Author: Fiona Glaser -Date: Wed Feb 3 18:36:44 2010 -0800 - - Fix subpel iteration counts with B-frame analysis and subme 6/8 - Since subme 6 means "like subme 5, except RD on P-frames", B-frame analysis - shouldn't use the RD subpel counts at subme 6. Similarly with subme 8. - Slightly faster (and very marginally worse) compression at subme 6 and 8. - -commit fc6bc8c17cb2f24b320a2db72daf300b2b46d1ca -Author: Fiona Glaser -Date: Wed Feb 3 18:19:29 2010 -0800 - - Simplify decimate checks in macroblock_encode - Also fix a misleading comment. - -commit 29dd5ef2e5069f900e5a8730e05d2ed35dcf8c02 -Author: Fiona Glaser -Date: Tue Feb 2 03:15:18 2010 -0800 - - Improve bidir search, fix some artifacts in fades - Modify analysis to allow bidir to use different motion vectors than L0/L1. - Always try the <0,0,0,0> motion vector for bidir. - Eliminates almost all errant motion vectors in fades. - Slightly improves PSNR as well (~0.015db). - -commit 27043c6b0a245859eab7d442a3d7a26cb9ba839e -Author: Fiona Glaser -Date: Mon Feb 1 13:04:47 2010 -0800 - - Slightly faster predictor_difference_mmxext - -commit 34c42187c53f22de8b8ca90acfc3c7df9367ce7a -Author: Fiona Glaser -Date: Fri Jan 29 02:40:41 2010 -0800 - - Add ability to adjust ratecontrol parameters on the fly - encoder_reconfig and x264_picture_t->param can now be used to change ratecontrol parameters. - This is extraordinarily useful in certain streaming situations where the encoder needs to adapt the bitrate to network circumstances. - - What can be changed: - 1) CRF can be adjusted if in CRF mode. - 2) VBV maxrate and bufsize can be adjusted if in VBV mode. - 3) Bitrate can be adjusted if in CBR mode. - However, x264 cannot switch between modes and cannot change bitrate in ABR mode. - - Also fix a bug where x264_picture_t->param reconfig method would not always be frame-exact. - - Commit sponsored by SayMama video calling. - -commit a424adb406070fe3ca9be7d02111a9d3b26d25f3 -Author: Yusuke Nakamura -Date: Sat Jan 30 13:53:01 2010 -0800 - - Fix regression in r1406 - Bitrate was printed incorrectly for some input framerates. - -commit 202938b1f3500578ade58bb478df53210d787364 -Author: Loren Merritt -Date: Sat Jan 30 12:01:51 2010 -0800 - - Fix log2f detection, include order, some gcc warnings - r1413 caused crashes on any system with malloc.h. - Also switch to std=c99 or std=gnu99 if supported by the compiler. - Fix visualize support. - -commit 0d38729198fd135bb8edeff4960d421512a26f43 -Author: Fiona Glaser -Date: Fri Jan 29 11:01:44 2010 -0800 - - Fix abstraction violations in x264.c - No calling application--not even x264cli--should ever look inside x264_t. - -commit ef92d3bb5a9b80ece30fe8411a3c33c47f080ce7 -Author: Diogo Franco -Date: Thu Jan 28 17:28:03 2010 -0800 - - Move -D CFLAGS to config.h - -commit e84726c8c2a02b094292f063625a9fdbd6c71253 -Author: Steven Walters -Date: Thu Jan 28 17:26:40 2010 -0800 - - Fix stat with large file support - -commit 567df927f0cd559c7be37b0a19257d1ee0ec5167 -Author: Diogo Franco -Date: Wed Jan 27 20:29:50 2010 -0800 - - Implement ffms2 version check - Depends on ffms2 version 2.13.1 (r272). - Tries pkg-config's built-in version checking first. - Uses only the preprocessor to avoid cross-compilation issues. - -commit 453a8ee404116dc05ecff0572a6353e466f3ef45 -Author: Fiona Glaser -Date: Wed Jan 27 19:41:27 2010 -0800 - - Fix implicit CBR message to only print when in ABR mode - Also make it print outside of debug mode. - -commit 46ff5086af715f072b7cdb221ac75e7d7774f520 -Author: Diogo Franco -Date: Wed Jan 27 13:11:08 2010 -0800 - - Add configure check for log2 support - Some incredibly braindamaged operating systems, such as FreeBSD, blatantly ignore the C specification and omit certain functions that are required by ISO C. - log2f is one of these functions that periodically goes missing in such operating systems. - -commit ac759e900e307c34878ca61efa240935a0ebf82b -Author: Diogo Franco -Date: Wed Jan 27 10:12:42 2010 -0800 - - Add config.log support - Now, if configure fails, you'll be able to see why. - -commit 4b4962921a6cf58837bfe438227f9e6112faeb73 -Author: Diogo Franco -Date: Wed Jan 27 09:26:35 2010 -0800 - - Fix cross-compiling with lavf, add support for ffms2.pc - Also update configure script to work with newest ffms. - -commit afc36d0b0ff867541827e3ff0f517df4cdf31fd6 -Author: Yusuke Nakamura -Date: Tue Jan 26 16:01:54 2010 -0800 - - Improve DTS generation, move DTS compression into libx264 - This change fixes some cases in which PTS could be less than DTS. - - Additionally, a new parameter, b_dts_compress, enables DTS compression. - DTS compression eliminates negative DTS (i.e. initial delay) due to B-frames. - The algorithm changes timebase in order to avoid duplicating DTS. - Currently, in x264cli, only the FLV muxer uses it. The MP4 muxer doesn't need it, as it uses an EditBox instead. - -commit 453c029929af2e835b7ee66acd5eb6968df72cdc -Author: Anton Mitrofanov -Date: Tue Jan 26 11:41:18 2010 -0800 - - Various threading-related cosmetics - Simplify a lot of code and remove some unnecessary variables. - -commit a93903c6085ca95e41ed84e2d1d8d22569dd1ae4 -Author: Fiona Glaser -Date: Mon Jan 25 11:23:55 2010 -0800 - - Hardcode the bs_t in cavlc.c; passing it around is a waste - - Saves ~1.5kb of code size, very slight speed boost. - -commit 91c0fd9499a7132ac599080faf55daa3b4c5c89a -Author: David Conrad -Date: Sat Jan 23 18:05:25 2010 -0800 - - Fix lavf input with pipes and image sequences - x264 should now be able to encode from an image sequence using an image2-style formatted string (e.g. file%02d.jpg). - -commit 535f0fa5eb42012e7e62d9eda61c39a11e7b0cb4 -Author: Fiona Glaser -Date: Thu Jan 21 23:07:11 2010 -0800 - - Fix bitstream alignment with multiple slices - Broke multi-slice encoding on CPUs without unaligned access. - New system simply forces a bitstream realignment at the start of each writing function and flushes when it reaches the end. - -commit f4186f5e87b6b85b8bceccf5fdca50fb7f6fdfc6 -Author: Fiona Glaser -Date: Thu Jan 21 10:00:07 2010 -0800 - - Merge nnz_backup with scratch buffer - Slightly less memory usage. - -commit ee911101d844b3f0baf653a0f05bc72fa5d32488 -Author: Steven Walters -Date: Wed Jan 20 09:00:54 2010 -0800 - - Use cross-prefix properly with pkg-config for cross-compiling - -commit f5af5f14e5d924a3b57d6bfbd1219a334771727b -Author: Fiona Glaser -Date: Mon Jan 18 20:29:33 2010 -0800 - - Various performance optimizations - Simplify and compact storage of direct motion vectors, faster --direct auto. - Shrink various arrays to save a bit of cache. - Simplify and reorganize B macroblock type writing in CABAC. - Add some missing ALIGNED macros. - -commit c0474786d6358580dd847dd5b3bfe7f2a5465ab1 -Author: Fiona Glaser -Date: Mon Jan 18 15:50:06 2010 -0800 - - Fix crash on new AMD M300 and similar CPUs - Apparently these CPUs have SSE4a, but not misaligned SSE. - -commit 7833e7bb18f8a16949e5047f0e8d081855a59c13 -Author: Fiona Glaser -Date: Sun Jan 17 19:11:05 2010 -0500 - - Fix intra refresh with subme < 6 - Also improve the quality of intra masking. - -commit eba302801995ae5ebc22999fb5a5823ddab61f00 -Author: Fiona Glaser -Date: Sat Jan 16 20:11:29 2010 -0500 - - Add support for multiple --tune options - Tunes apply in the order they are listed in the case of conflicts. - Psy tunings, i.e. film/animation/grain/psnr/ssim, cannot be combined. - Also clarify --profile, which forces the limits of a profile, not the profile itself. - -commit 5bcf1378ede23b75e24d7c71690563a92708723f -Author: Fiona Glaser -Date: Sat Jan 16 02:50:15 2010 -0500 - - Various bugfixes and tweaks in analysis - Fix the oldest-ever bug in x264: b16x8 analysis used the wrong width for predict_mv. - Fix cache_ref calls for slightly better MV prediction in bsub16x16 analysis. - Make B-partition analysis consider reference frame costs. - Various other minor changes. - Overall very slightly improved mode decision and motion search in B-frames. - -commit 741ed788e905820d2a9fc892ea288350e939b78f -Author: Loren Merritt -Date: Thu Jan 14 14:52:12 2010 -0500 - - More --me tesa optimizations - -commit 4f7b5f6c1f717b0485d990aca1c0731eefb90f7a -Author: Fiona Glaser -Date: Thu Jan 14 10:39:10 2010 -0500 - - Fix typo in configure - -commit 0b73a76e891516f87412e89c2a298451741578ba -Author: Fiona Glaser -Date: Thu Jan 14 00:07:30 2010 -0500 - - Make --fps force CFR mode - -commit c37a51005f305e7c339051e102fbcab266cbef83 -Author: Fiona Glaser -Date: Wed Jan 13 20:21:31 2010 -0500 - - Eliminate intentional array overflow in quant matrix handling - While it probably never caused problems, it was incredibly ugly and evil. - -commit 0210f805a696d257a714fd211c3df3457ea26ba8 -Author: Fiona Glaser -Date: Wed Jan 13 20:16:13 2010 -0500 - - Faster --me tesa - -commit da619d5deaae712cdfc3641c8ce7a51591fdc4d5 -Author: Anton Mitrofanov -Date: Wed Jan 13 15:44:00 2010 -0500 - - Fix static pthreads + dynamically linked x264 on win32 - Add the necessary static pthread initialization code to a new DLLmain function. - -commit 229d8d76886b740d3403fc942de7e03062946dd0 -Author: Steven Walters -Date: Tue Jan 12 22:55:10 2010 -0500 - - Add getopt_long to the included getopt.c - Fixes option handling on OSs that have a nonworking/missing getopt (e.g. Solaris). - -commit 62ece1c2fef6388925edcce15f519450b2add5dd -Author: Fiona Glaser -Date: Tue Jan 12 20:14:35 2010 -0500 - - Faster psy-trellis init - Remove some unncessary zigzags. - -commit 85dc3f9fab268bda2ec626c7384a6fb2a0b146ba -Author: Fiona Glaser -Date: Tue Jan 12 19:19:07 2010 -0500 - - Simplfy intra mode availability handling - Slightly faster, 1.5kb smaller binary size, less code. - -commit 398d0eb3e86ccd1b092fa52cf1217cc58b22ddaa -Author: Fiona Glaser -Date: Sun Jan 10 15:14:02 2010 -0500 - - Fix free callback, add x264_encoder_parameters function - x264 would try to use the passed param struct after freeing if the param_free callback was set. - Probably didn't cause any issues, as probably no programs used the callback in this location yet. - - A new x264_encoder_parameters function is now available in the API. - This function lets the calling application grab the current state of the encoder's parameters. - Use this in x264cli to ensure that the param struct used for set_param is updated with whatever changes x264_encoder_open has made to it. - - Patch partially by Anton Mitrofanov . - -commit aa48c1fbb74308fecfe5f7eceee63076479f32dd -Author: David Conrad -Date: Sat Jan 9 01:52:33 2010 -0500 - - Fix x264 compilation on Apple GCC - Apple's GCC stupidly ignores the ARM ABI and doesn't give any stack alignment beyond 4. - -commit fd1cf29494463f0dd9ac9b01158a78f7c7913a0f -Author: Fiona Glaser -Date: Sat Jan 2 03:27:46 2010 -0500 - - Faster weightp motion search - For blind-weight dupes, copy the motion vector from the main search and qpel-refine instead of doing a full search. - Fix the p8x8 early termination, which had unexpected results when combined with blind weighting. - Overall, marginally reduces compression but should potentially improve speed by over 5%. - -commit bc0ae2ef40289c310027902e72572e5d8990fbd8 -Author: Fiona Glaser -Date: Thu Dec 31 13:45:27 2009 -0500 - - More correct padding constants for lowres planes - Since lowres analysis isn't interlace-aware, we don't need to double the vertical padding for interlaced video. - -commit 5d40e878b75422c7b13cd5ab01ddfc2cf6b33938 -Author: Fiona Glaser -Date: Thu Dec 31 02:57:45 2009 -0500 - - Fix some invalid reads caught by valgrind - Temporal predictor calculation was misled by invalid reference counts for I-frames. - -commit cde39046222b112261179144033e7a51430783d0 -Author: Fiona Glaser -Date: Tue Dec 22 18:59:29 2009 -0500 - - Periodic intra refresh - Uses SEI recovery points, a moving vertical "bar" of intra blocks, and motion vector restrictions to eliminate keyframes. - Attempt to hide the visual appearance of the intra bar when --no-psy isn't set. - Enabled with --intra-refresh. - The refresh interval is controlled using keyint, but won't exceed the number of macroblock columns in the frame. - Greatly benefits low-latency streaming by making it possible to achieve constant framesize without intra-only encoding. - Combined with slice-max size for one slice per packet, tests suggest effective resiliance against packet loss as high as 25%. - x264 is now the best free software low-latency video encoder in the world. - - Accordingly, change the API to add b_keyframe to the parameters present in output pictures. - Calling applications should check this to see if a frame is seekable, not the frame type. - - Also make x264's motion estimation strictly abide by horizontal MV range limits in order for PIR to work. - Also fix a major bug in sliced-threads VBV handling. - Also change "auto" threads for sliced threads to "cores" instead of "1.5*cores" after performance testing. - Also simplify ratecontrol's checking of first pass options. - Also some minor tweaks to row-based VBV that should improve VBV accuracy on small frames. - -commit 30d76a5eee9355c5d3e81fc7eae65f926dec16a9 -Author: Kieran Kunhya -Date: Mon Dec 28 10:42:17 2009 -0500 - - LAVF/FFMS input support, native VFR timestamp handling - libx264 now takes three new API parameters. - b_vfr_input tells x264 whether or not the input is VFR, and is 1 by default. - i_timebase_num and i_timebase_den pass the timebase to x264. - - x264_picture_t now returns the DTS of each frame: the calling app need not calculate it anymore. - - Add libavformat and FFMS2 input support: requires libav* and ffms2 libraries respectively. - FFMS2 is _STRONGLY_ preferred over libavformat: we encourage all distributions to compile with FFMS2 support if at all possible. - FFMS2 can be found at http://code.google.com/p/ffmpegsource/. - --index, a new x264cli option, allows the user to store (or load) an FFMS2 index file for future use, to avoid re-indexing in the future. - - Overhaul the muxers to pass through timestamps instead of assuming CFR. - Also overhaul muxers to correctly use b_annexb and b_repeat_headers to simplify the code. - Remove VFW input support, since it's now pretty much redundant with native AVS support and LAVF support. - Finally, overhaul a large part of the x264cli internals. - - --force-cfr, a new x264cli option, allows the user to force the old method of timestamp handling. May be useful in case of a source with broken timestamps. - Avisynth, YUV, and Y4M input are all still CFR. LAVF or FFMS2 must be used for VFR support. - - Do note that this patch does *not* add VFR ratecontrol yet. - Support for telecined input is also somewhat dubious at the moment. - - Large parts of this patch by Mike Gurlitz , Steven Walters , and Yusuke Nakamura . - -commit 8c8bfe19dfe0dd0728771594dac2141051860aef -Author: Fiona Glaser -Date: Tue Dec 15 16:59:00 2009 -0800 - - More help typo fixes - -commit 65f988b7bb003e6503133231423a8f5192d32603 -Author: Loren Merritt -Date: Thu Jan 14 03:07:30 2010 +0000 - - Fix x264_clz on inputs > 1<<31 - (though x264 never generates such inputs) - -commit b7fa2857a9eeb3275035673f47a6d64331234816 -Author: Fiona Glaser -Date: Sun Dec 13 03:16:04 2009 -0800 - - Don't do sum/ssd analysis if weightp == 1 - Typo fixes in comments and help. - -commit f30aed6d810ef408cbf19cc6760605b0b87cbfde -Author: Fiona Glaser -Date: Fri Dec 11 17:22:18 2009 -0800 - - Fix two bugs in 2-pass ratecontrol - last_qscale_for wasn't set during the 2pass init code. - abr_buffer was way too small in the case of multiple threads, so accordingly increase its buffer size based on the number of threads. - May significantly increase quality with many threads in 2-pass mode, especially in cases with extremely large I-frames, such as anime. - -commit 7f0ef681aa92c585fcb3534b370c7ac60e4866ec -Author: Steven Walters -Date: Thu Dec 10 19:48:51 2009 -0800 - - Avisynth-MT and 2.6 compatibility fixes - Explain to the user why YV12 conversion is forced with Avisynth 2.6. - Fix encoding with Avisynth-MT scripts by inserting the necessary Distributor() call; speeds such scripts back up to expected levels. - -commit e09a20eb39776192715025f52637edf6208488e9 -Author: Steven Walters -Date: Wed Dec 9 16:03:19 2009 -0800 - - Fix zone parsing on mingw - Due to MinGW evidently being in the hands of a pack of phenomenal idiots, MinGW does not have strtok_r, a basic string function. - As such, remove the dependency on strtok_r in zone parsing. - Previously, using zones for anything other than ratecontrol failed. - -commit 84ccdd3a6d1fd2193daabc75cc6299e24fb0e996 -Author: Fiona Glaser -Date: Wed Dec 9 15:03:44 2009 -0800 - - More lookahead optimizations - Under subme 1, don't do any qpel search at all and round temporal MVs accordingly. - Drop internal subme with subme 1 to do fullpel predictor checks only. - Other minor optimizations. - -commit bf70233e48ef64e766adc694c13526be19739b7f -Author: Fiona Glaser -Date: Wed Dec 9 05:56:35 2009 -0800 - - Various minor missing changes from previous commits - Boolify sliced threads too - Remove unused constants from dct-a.asm - Fix a few typos/minor errors in preset documentation - -commit 0b34d4672b1517a5b166d96d20f952122c7a09f7 -Author: Fiona Glaser -Date: Thu Dec 10 16:52:39 2009 -0800 - - Fix regression in direct=auto/temporal in r1364 - Bug caused rare race condition in frame reference handling. - This resulted in invalid bitstreams in some B-frames and, very rarely, crashes. - -commit c0e6a94555942bfd0e3b51dfb2aebe695a23754f -Author: Fiona Glaser -Date: Tue Dec 8 17:46:55 2009 -0800 - - Add fast pskip to x264 SEI info header - -commit f0ac608d00433fa6fbff95c0074f2de9e16b4a93 -Author: Steven Walters -Date: Tue Dec 8 11:36:25 2009 -0800 - - Minor seeking fix with Avisynth input - Seeking past the end of the input with --seek would result in the same frame being repeated over and over. - -commit c186d2ac9c2ac4f2157e63b5b86f5bb378ceeffd -Author: Fiona Glaser -Date: Tue Dec 8 03:08:17 2009 -0800 - - Add support for MB-tree + B-pyramid - Modify B-adapt 2 to consider pyramid in its calculations. - Generally results in many more B-frames being used when pyramid is on. - Modify MB-tree statsfile reading to handle the reordering necessary. - Make differing keyint or pyramid between passes into a fatal error. - -commit 073d32e5801899fa516da54bf06527c0ab74dd7b -Author: Fiona Glaser -Date: Mon Dec 7 18:34:05 2009 -0800 - - Use aliasing-avoidance macros in array_non_zero - -commit 346844afc4edc1b5c36990b207706e2fd9b815b0 -Author: Cleo Saulnier -Date: Mon Dec 7 12:40:14 2009 -0800 - - MMX version of 8x8 interlaced zigzag - Just as fast as SSSE3 on Nehalem (and faster on Conroe/Penryn), so remove the SSSE3 version. - -commit 6f221210903f1b4e06146b3cf5e618d62dfc0a8c -Author: Fiona Glaser -Date: Mon Dec 7 00:49:41 2009 -0800 - - Bring back slice-based threading support - Enabled with --sliced-threads - Unlike normal threading, adds no encoding latency. - Less efficient than normal threading, both performance and compression-wise. - Useful for low-latency encoding environments where performance is still important, such as HD videoconferencing. - Add --tune zerolatency, which eliminates all x264 encoder-side latency (no delayed frames at all). - Some tweaks to VBV ratecontrol and lookahead (in addition to those required by sliced threading). - Commit sponsored by a media streaming company that wishes to remain anonymous. - -commit a2380187a1a08b71d06fa5302c2356d28f4b7ffc -Author: Alex Jurkiewicz -Date: Mon Dec 7 18:17:29 2009 -0800 - - Add more detailed help for presets/tunes/profiles - Shows what options they represent. - -commit 744fd11932329fa11b444dfa195c6e669cb3061a -Author: Fiona Glaser -Date: Sat Dec 5 03:19:44 2009 -0800 - - qpel RD no longer needs mbcmp_unaligned - -commit 9a100e51dd91ed4cf50bdad4a91b100dd838409d -Author: Loren Merritt -Date: Wed Dec 9 00:37:09 2009 +0000 - - ensure that all boolean options are {0,1} so they print consistently in the options SEI - -commit 75b3871f90713a290be183e1436e792cef51f335 -Author: Fiona Glaser -Date: Sat Dec 5 02:27:30 2009 -0800 - - Actually do r1356 - Somehow commit r1356 got lost in the ether. I'm not sure how, but now it's fixed. - -commit 16bedf051d4a0147340b0f7de24dd68081ac2df9 -Author: Steven Walters -Date: Fri Dec 4 12:17:56 2009 -0800 - - Remove some unused code from x264.c - -commit 5ec818320745db0e78c37cbf7db3a6ec2c6c8dfb -Author: Fiona Glaser -Date: Thu Dec 3 15:36:52 2009 -0800 - - SSSE3 version of zigzag_8x8_field - Slightly faster interlaced encoding with 8x8dct. - Helps most on Nehalem, somewhat disappointing on Conroe/Penryn. - -commit f851c923d59fbaa73bf1148be3b796b043a7e187 -Author: Fiona Glaser -Date: Wed Dec 2 19:55:45 2009 -0800 - - Fix crash in interlaced with >8 refs - Crash introduced in weightp. - -commit 4aa33d658263abb40bf91438b5ec1eb93d86621f -Author: Fiona Glaser -Date: Tue Dec 1 16:15:15 2009 -0800 - - Significantly faster qpel-RD - Cache the results of MC, like in bidir-RD. - Slightly changes output due to the necessary reordering of satd/RD calls. - 5-10% faster qpel-RD. - -commit aaf7548eb173d622fc8e715e1319dc6d2c8e2853 -Author: David Conrad -Date: Tue Dec 1 12:23:09 2009 -0800 - - Add x264 prefix to functions with ffmpeg equivalents - Not important now, but will be when we add libav* input support. - -commit ade48a91e4d6933957a368bdab3dd7e0640925fc -Author: Fiona Glaser -Date: Mon Nov 30 01:41:24 2009 -0800 - - 10L in r1353 - Broke mp4 output. - -commit 025f01dba74e6f92aa282e01cafeb8ec841af3e7 -Author: Steven Walters -Date: Thu Nov 26 22:37:18 2009 -0800 - - Enhanced Avisynth input support - Requires avisynth_c.h from the Avisynth API headers. - Reports errors properly from Avisynth script input. - Automatically construct input scripts for almost any input file. - Tries ffmpegsource2, DSS2, directshowsource, and many other sourcing methods, based on the input file extension. - Automatically converts to YV12. - -commit 979c14da90d69d05661430ace29d111efe615281 -Author: Fiona Glaser -Date: Wed Nov 25 10:40:08 2009 -0800 - - Much faster weightp - Move sum/ssd calculation out of lookahead and do it only once per frame. - Also various minor optimizations, cosmetics, and cleanups. - -commit cee009ff0577582f093d01f9a88909157734858e -Author: Kieran Kunhya -Date: Wed Nov 25 01:26:02 2009 -0800 - - Fix bugs in fps/timestamp handling in FLV muxer - -commit eaf9ab20af24900468a0ac71549a9e01d2dca92f -Author: Fiona Glaser -Date: Tue Nov 24 22:37:02 2009 -0800 - - Fix bug in weightp analysis - Weights weren't reset upon early terminations, so old (wrong) weights could stick around. - Small compression improvement. - -commit a9885c789e5a4703cf43b6601cc347836484f853 -Author: Fiona Glaser -Date: Tue Nov 24 20:24:14 2009 -0800 - - Minor deblocking optimization, update comments - -commit b02dd71289e2532bdc3f764145490d62783dd296 -Author: Fiona Glaser -Date: Tue Nov 24 16:21:07 2009 -0800 - - Fix weightb with delta_poc_bottom - Has no effect yet, but will be required once we add TFF/BFF signalling support in interlaced mode. - Gives 0.5-0.7% better compression with proper TFF/BFF signalling. - -commit 12353fb902f647a264fb1fe2d584ffb4f2ed2c4f -Author: Fiona Glaser -Date: Fri Nov 20 23:27:51 2009 -0800 - - Give more meaningful error if 1st/2nd pass resolution differ - -commit d38bce2c91c2d766f5611c6b100674ad66016e1f -Author: Steven Walters -Date: Fri Nov 20 12:04:13 2009 -0800 - - Fix extremely rare deadlock with sync-lookahead - Patch partially by Anton Mitrofanov. - -commit c86a6de2b4116fd4c83d82cf9e5dec17f2519050 -Author: Fiona Glaser -Date: Fri Nov 20 08:04:28 2009 -0800 - - Only print weightp stats if there were P-frames - -commit 321674b7573a59ba1bdc2b0cab65a4e1e6b7c411 -Author: Fiona Glaser -Date: Wed Nov 18 13:47:04 2009 -0800 - - Faster lookahead with subme=1 - If it hasn't been clear already, don't use subme=1 as a "fast first pass" option. - Use subme=2 instead; 1 and below now enable a fast (lower quality) lookahead mode. - -commit 63f7147714b37f1779dcf62138f21771368cb8e8 -Author: Fiona Glaser -Date: Mon Nov 16 15:23:58 2009 -0800 - - Faster weightp analysis - Modify pixel_var slightly to return the necessary information and use it for weight analysis instead of sad/ssd. - Various minor cosmetics. - -commit 118dc81e7116b16ba1d2204a387aa88669b8e0bd -Author: Dylan Yudaken -Date: Sun Nov 15 16:14:50 2009 -0800 - - Fix two issues in weightp - If analysis decided on an offset of -128, x264 would create non-compliant streams. - Fix some cases with nearly all intra blocks where analysis could pick very weird weights. - Also add some asserts to check compliancy. - -commit 876c9e528bfcbf8932ac9ffb5091fb3f541ddb91 -Author: Alexander Strange -Date: Sat Nov 14 22:16:18 2009 -0800 - - Allow compilation with non-Apple GCC on OS X - -commit ddbac0c6f6b320eb8b1731c744d7803140b0a5a3 -Author: Alexander Strange -Date: Sat Nov 14 22:13:28 2009 -0800 - - Use __attribute__((may_alias)) for type-punning - GCC thinks pointer casts to unions aren't valid with strict aliasing. - See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Optimize-Options.html#Type_002dpunning. - Also use M32() in y4m.c. - Enable -Wstrict-aliasing again since all such warnings are fixed. - -commit 69163c3b6d8fe0b85cddd4e47c6a4bdbf6c170f9 -Author: Fiona Glaser -Date: Sat Nov 14 19:58:46 2009 -0800 - - 100l in deadlock fix - -commit 25a029458f70ac1bee8369bee321c8fdcf166f18 -Author: Kieran Kunhya -Date: Sat Nov 14 19:01:09 2009 -0800 - - FLV muxing support - -commit b9ce3a10bacb32af12756d4104d7d8ef255c140a -Author: Fiona Glaser -Date: Sat Nov 14 18:40:22 2009 -0800 - - Fix rare deadlock introduced in weightp - -commit de0e873567cb5bd900f72c88f2fafefb1f890a51 -Author: Fiona Glaser -Date: Thu Nov 12 12:40:40 2009 -0800 - - Actually add -Wno-strict-aliasing to configure - -commit 45b28315b47759f29fd1605814ea361990c00dea -Author: Dylan Yudaken -Date: Thu Nov 12 07:03:46 2009 -0800 - - Various weightp fixes - Make weightp results match in threaded vs non-threaded mode. - Fix two-pass with slow-firstpass. - -commit 03cb8c09553f24bf800cd47893e48b0aa91f9313 -Author: Fiona Glaser -Date: Thu Nov 12 05:25:32 2009 -0800 - - Fix all aliasing violations - New type-punning macros perform write/read-combining without aliasing violations per the second-to-last part of 6.5.7 in the C99 specification. - GCC 4.4, however, doesn't seem to have read this part of the spec and still warns about the violations. - Regardless, it seems to fix all known aliasing miscompilations, so perhaps the GCC warning generator is just broken. - As such, add -Wno-strict-aliasing to CFLAGS. - -commit 241aacca01ee167ec632194ea0edbeffa44145df -Author: David Conrad -Date: Wed Nov 11 20:53:49 2009 -0800 - - Fix 10l in weightp on ARM - -commit 3a4c7dae3deeeb729251e1098d70befab1ad4a0e -Author: Fiona Glaser -Date: Mon Nov 9 21:22:41 2009 -0800 - - Fix one (of possibly many) miscompilations in weightp - Use NOINLINE and some emms calls to fix emms reordering issues. - This issue occurred with some GCC versions if threads > 1 and the phase of the moon was right. - Also a cosmetic in x264.c. - -commit 4ed2a8e3d46a1a90df41ed0a195a3926f36d6c15 -Author: Fiona Glaser -Date: Mon Nov 9 09:18:03 2009 -0800 - - Fix pixel_ssd on win64 - Didn't preserve XMM registers, may or may not have caused problems. - -commit b305297084738ef84a3c60d53f001734b1dd96f5 -Author: Steven Walters -Date: Sun Nov 8 22:18:35 2009 -0800 - - Fix weightp logfile parsing on MinGW - -commit df732ec7f119a02fb124261d588151026114d43d -Author: Loren Merritt -Date: Mon Nov 9 05:27:29 2009 +0000 - - cosmetics - -commit 094110915e2de3410feca47463bae4a8b28f587e -Author: David Conrad -Date: Sun Nov 8 20:12:54 2009 -0800 - - Fix weightp on ARM + PPC - No ARM or PPC assembly yet though. - -commit ccac8546bc1596f2313c3f472caab720c9753275 -Author: Dylan Yudaken -Date: Sun Nov 8 17:59:08 2009 -0800 - - Weighted P-frame prediction - Merge Dylan's Google Summer of Code 2009 tree. - Detect fades and use weighted prediction to improve compression and quality. - "Blind" mode provides a small overall quality increase by using a -1 offset without doing any analysis, as described in JVT-AB033. - "Smart", the default mode, also performs fade detection and decides weights accordingly. - MB-tree takes into account the effects of "smart" analysis in lookahead, even further improving quality in fades. - If psy is on, mbtree is on, interlaced is off, and weightp is off, fade detection will still be performed. - However, it will be used to adjust quality instead of create actual weights. - This will improve quality in fades when encoding in Baseline profile. - - Doesn't add support for interlaced encoding with weightp yet. - Only adds support for luma weights, not chroma weights. - Internal code for chroma weights is in, but there's no analysis yet. - Baseline profile requires that weightp be off. - All weightp modes may cause minor breakage in non-compliant decoders that take shortcuts in deblocking reference frame checks. - "Smart" may cause serious breakage in non-compliant decoders that take shortcuts in handling of duplicate reference frames. - - Thanks to Google for sponsoring our most successful Summer of Code yet! - -commit b06734129a221be0c7a9a66c91ad042338abcd7c -Author: Steven Walters -Date: Sun Nov 8 11:53:48 2009 -0800 - - Fix assert failure in the case of forced i-frames - Note that this applies to non-IDR i-frames, not IDR-frames. - This fix is also required for future open-gop. - -commit 133ee69dff02b7db62dc99aca3f60c534c90eb34 -Author: Steven Walters -Date: Sat Nov 7 17:07:28 2009 -0800 - - Fix issues relating to input/output files being pipes/FIFOs - -commit 53a5772a35451c897366adda72d3a44c13103c38 -Author: David Conrad -Date: Sat Nov 7 09:25:18 2009 -0800 - - Various ARM-related fixes - Fix comment for mc_copy_neon. - Fix memzero_aligned_neon prototype. - Update NEON (i)dct_dc prototypes. - Duplicate x86 behavior for global+hidden functions. - -commit 30b3825ed00f7f88397a26760cac5248f2f8e226 -Author: Fiona Glaser -Date: Wed Nov 4 00:03:14 2009 -0800 - - Fix miscompilation with gcc 4.3 on ARM - Aliasing violation in spatial prediction caused nasty artifacts. - Shut up two other GCC warnings while we're at it. - -commit d2e7a5a6bf716b2cc1ad32bb842d28935060ccc3 -Author: Fiona Glaser -Date: Tue Nov 3 23:15:35 2009 -0800 - - Fix extremely rare infinite loop in 2-pass VBV - Implicit conversion from double->float lost enough precision to cause the loop termination condition to never trigger. - Bug report by Tal Aloni. - -commit f3c9e6f3e77070f2f5447ef006959e8885a38e55 -Author: Anton Mitrofanov -Date: Sat Oct 31 19:51:14 2009 -0700 - - Fix large file support, broken in r1302 - -commit 99cf5bf62a738b05c7168f04e344d6d596c874d3 -Author: Fiona Glaser -Date: Fri Oct 30 18:58:03 2009 -0700 - - Dramatically reduce size of pixel_ssd_* asm functions - ~10k of code size eliminated. - -commit 3ddc66cc5f785f4791939975dee1244a513f2a50 -Author: Loren Merritt -Date: Sat Nov 7 06:09:47 2009 +0000 - - fix bottom-right pixel of lowres planes, which was uninitialized. - weirdly, valgrind reported this only with --no-asm. - -commit b4838a5e3ca349719227f190d79ba7e534742a72 -Author: Fiona Glaser -Date: Thu Oct 29 12:28:37 2009 -0700 - - Further reduce code size in bime - ~7-8 kilobytes saved, ~0.6% faster subme 9. - -commit ecbe2b47036d62f05edcaedea381194ae50516f3 -Author: Anton Mitrofanov -Date: Wed Oct 28 12:57:11 2009 -0700 - - Fix case in which MB-tree didn't propagate all data correctly - Should improve quality in all cases. - Also some minor cosmetic improvements. - -commit a0bbef702a4aa9a36c780ab9ed3eade4e31412d4 -Author: Fiona Glaser -Date: Tue Oct 27 16:01:46 2009 -0700 - - Take into account chroma MV offset during interlaced motion search - Small improvement in interlaced compression. - -commit 98a6d134d3638785bda99e1303c00f3ce471ec63 -Author: Fiona Glaser -Date: Tue Oct 27 15:08:37 2009 -0700 - - Slightly faster ssse3 width4 chroma MC - Cacheline-aware in the same fashion as width8, but not conditional. - -commit 8dc839a6300c116faf040b2dae47b06c2920b4f8 -Author: Fiona Glaser -Date: Tue Oct 27 14:01:46 2009 -0700 - - Eliminate some rare cases where MB-tree gave incorrect results in B-frames - Also get rid of some unnecessary memcpies. - -commit 59f31c25f4c0f20358fc2ef15c2257d2b05716c2 -Author: Anton Mitrofanov -Date: Tue Oct 27 12:28:07 2009 -0700 - - Fix cases in which b-adapt 1 could result in AUTO-type frames. - This didn't actually cause any issues, but it removes the need for the fixing-up code that prevented said issues. - -commit 80a3909c1373ceceabed0f41eee366fc7de7cb1b -Author: Fiona Glaser -Date: Mon Oct 26 12:53:07 2009 -0700 - - Motion compensation optimizations - Turning off inlining saves a whole boatload of code size for near-zero speed cost. - Simplify offset calculation. - Various other optimizations. - -commit 9ef68adbe37c707a1195e4027ef8bbfc655b090b -Author: Fiona Glaser -Date: Sun Oct 25 19:41:10 2009 -0700 - - Minor CAVLC optimizations - -commit d947f151e09a8c412c23b2d0800a0570d6fe6287 -Author: Loren Merritt -Date: Sun Oct 25 19:34:12 2009 +0000 - - cosmetics - -commit 35838f77a2961f52f8eb7e9d236c0d6abb47c0fc -Author: Fiona Glaser -Date: Sun Oct 25 09:14:27 2009 -0700 - - ISC-license x86inc.asm - As the assembly abstraction layer is very useful in non-x264 projects, it is now ISC (simplified BSD) so that others, even in commercial projects, can use it as well. - -commit 2b695c6dd398c4ef16c9763dc64f57fad1e081d5 -Author: Fiona Glaser -Date: Fri Oct 23 16:20:39 2009 -0700 - - Various minor CABAC optimizations - -commit 60fb787e49986c6b83825d59bd64866cb8be82be -Author: Lamont Alston -Date: Fri Oct 23 11:01:13 2009 -0700 - - Fix bug in b-pyramid strict - Bug caused invalid streams in some situations. - -commit 1e3729ecfaeae534162a7770479b6761d41b38b2 -Author: Fiona Glaser -Date: Fri Oct 23 02:34:49 2009 -0700 - - Remove non-mod16 warning - Compression only "suffers" by an extremely marginal amount and too many people misinterpret the warning. - -commit a7d3ceb4871dbe46c8437be014ac45d550602f9e -Author: Fiona Glaser -Date: Thu Oct 22 22:38:32 2009 -0700 - - Fix two warnings + some minor optimizations - -commit d1df0c41c83db70090389f5d3f9b8824983c630f -Author: Fiona Glaser -Date: Mon Oct 19 22:38:01 2009 -0700 - - Fix a typo in b-pyramid help - And an errant space in common/macroblock.c - -commit 62ff2d4358372147429d295c34dfc425ddb30e58 -Author: Henrik Gramner -Date: Mon Oct 19 12:57:47 2009 -0700 - - A bit more write-combining in macroblock_cache_load - -commit a0df454b358000eb4f5485f8d09a2620fa6c32e5 -Author: Steven Walters -Date: Sat Oct 24 00:23:50 2009 +0000 - - split muxers.c into one file per format - simplify internal muxer API - -commit d73d798ef054c36250d16f75f267091bd5b6a877 -Author: Fiona Glaser -Date: Mon Oct 19 02:43:48 2009 -0700 - - Update fprofile with the latest change to b-pyramid - -commit ed903d902bca9ab8ddde93aef5f38ef9a7883a99 -Author: Steven Walters -Date: Sat Oct 17 12:54:41 2009 -0700 - - Fix assertion fail and incorrect costs with pyramid+VBV - Deal properly with QPfile'd B-refs. x264 should handle multiple B-refs per minigop now, though only via forced frametypes. - -commit 318298e9e1c742fb1453ce8ae6574eaed7487e65 -Author: Fiona Glaser -Date: Sat Oct 17 03:04:56 2009 -0700 - - Improve CRF initial QP selection, fix get_qscale bug - If qcomp=1 (as in mb-tree), we don't need ABR_INIT_QP. - get_qscale could give slightly weird results with still images - -commit d9e6b1732e8a49054870e50990dc54e659f9e1af -Author: Fiona Glaser -Date: Wed Oct 14 11:32:27 2009 -0700 - - Print more accurate error message if dump_yuv fails - -commit 29dba1c3446f51ddcd003e4e0998d931ddb24920 -Author: Steven Walters -Date: Tue Oct 13 09:56:04 2009 -0700 - - Reduce memory usage of b-adapt 2 trellis - Also fix a minor bug where the algorithm ignored the last frame in the trellis. - -commit cf5ba8134a4bdd381e75a5c2ea434198a7174a36 -Author: Lamont Alston -Date: Mon Oct 12 23:32:16 2009 -0700 - - Make B-pyramid spec-compliant - The rules of the specification with regard to picture buffering for pyramid coding are widely ignored. - x264's b-pyramid implementation, despite being practically identical to that proposed by the original paper, was technically not compliant. - Now it is. - Two modes are now available: - 1) strict b-pyramid, while worse for compression, follows the rule mandated by Blu-ray (no P-frames can reference B-frames) - 2) normal b-pyramid, which is like the old mode except fully compliant. - This patch also adds MMCO support (necessary for compliant pyramid in some cases). - MB-tree still doesn't support b-pyramid (but will soon). - -commit e691cc0e3563b554e199cafbec82109d6a496c36 -Author: Fiona Glaser -Date: Mon Oct 12 23:28:26 2009 -0700 - - Add missing free for nal_buffer - Fixes a memory leak. - -commit f6431055381c311aa08bfb9335bd1adb9bd8be3e -Author: Loren Merritt -Date: Sun Oct 18 21:47:18 2009 +0000 - - sync yasm macros to ffmpeg - -commit 040663db98c09eb819364d77059450166c294950 -Author: Loren Merritt -Date: Sat Oct 17 14:54:49 2009 +0000 - - eliminate some divisions - -commit 744ea94e0d76db75eb111f1e8c9f4804165a6315 -Author: Fiona Glaser -Date: Mon Oct 12 18:40:28 2009 -0700 - - Fix glitches with slow-firstpass + weightb + multiref + 2pass - Bug in r1277 - -commit 84999edb42023a75aadffe356cce538834dee84b -Author: Henrik Gramner -Date: Mon Oct 12 15:44:13 2009 -0700 - - Simplify some code in b-adapt 2's trellis - -commit d421ce5d657cdd8e200ac003cbfcffb45d6a388d -Author: Fiona Glaser -Date: Mon Oct 12 15:38:51 2009 -0700 - - Fix a very rare integer overflow in slicetype analysis - Caused an assert failure when it occurred. - Bug is as old as adaptive B-frames. - -commit 07cfdf8468fbba99784f5faa5230cca34e149a29 -Author: Fiona Glaser -Date: Mon Oct 12 13:14:19 2009 -0700 - - Reduce the aggressiveness of 2-pass VBV - Now that B-frames are properly covered, we don't have to be as aggressive. - This eliminates some issues with skyrocketing QPs in B-frames in 2-pass VBV. - -commit a0b07e91d128b91eb6bef7189e79a3f14f39af3d -Author: Fiona Glaser -Date: Mon Oct 12 11:29:23 2009 -0700 - - Fix regression: disable flash detection without B-frames - -commit 1fbba0ca5d97d4f3250864c5cc6431c69855cb59 -Author: Loren Merritt -Date: Sat Oct 10 04:43:00 2009 +0000 - - change all dct arrays to 1d. - the C standard doesn't allow you to iterate 1-dimensionally over 2d arrays, and nothing other than the dsp functions themselves cares about the 2dness of dct. - this fixes a miscompilation in x264_mb_optimize_chroma_dc. - -commit 507c83428027ba0886168b55324af5f1d5befbdd -Author: Fiona Glaser -Date: Sun Oct 11 20:17:50 2009 -0700 - - Add row-based VBV for B-frames - While B-frames still aren't explicitly covered by ratecontrol, this should resolve issues of VBV underflows due to larger-than-expected B-frames. - -commit c51d00b7c742aa84ac7e113ba03d808cb3132af2 -Author: Fiona Glaser -Date: Sat Oct 10 17:35:03 2009 -0700 - - Improve VBV, fix bug in 2-pass VBV introduced in MB-tree - Bug caused AQ'd row/frame costs to not be calculated (and thus caused underflows). - Also make VBV more aggressive with more threads in 2-pass mode. - Finally, --ratetol now affects VBV aggressiveness (higher is less aggressive). - -commit 1a1b9c6f9b35025223b4a7ca68af4ec95ede8f79 -Author: Anton Mitrofanov -Date: Thu Oct 8 14:55:26 2009 -0700 - - Optimize exp2fix8 - Slightly faster and more accurate rounding. - -commit c695f52485f11445c981f7a7b2e1a485ebec2d6a -Author: Fiona Glaser -Date: Thu Oct 8 04:27:11 2009 -0700 - - Avoid scenecuts in flashes and similar situations - "Flashes" are defined as any scene which lasts a very short period before a previous scene returns. - A common example of this is of course a camera flash. - Accordingly, look ahead during scenecut analysis and rule out the possibility of certain frames being scenecuts. - Also handles cases of tons of short scenes in sequence and avoids making those scenecuts as well. - Can only catch flashes of 1 frame in length with b-adapt 1. - With b-adapt 2, can catch flashes of length --bframes. - Speed cost should be negligible. - -commit 3b81316490e58524d3f86f2439cc8cfa2355eac3 -Author: Fiona Glaser -Date: Tue Oct 6 22:15:10 2009 -0700 - - Fix bug where x264 generated non-compliant bitstreams with insane SAR values - -commit 6e8487f4ea6c761f3ddc14766dd254790f6c8e9e -Author: Loren Merritt -Date: Wed Sep 30 22:39:13 2009 +0000 - - rm msvc project files and related ifdefs - -commit e9fbd8db8908074f46005383bf0c117d5fc4c8a8 -Author: Holger Lubitz -Date: Tue Oct 6 15:17:34 2009 -0700 - - SSE4 version of 4x4 idct - 27->24 clocks on Nehalem. - This is really just an excuse to use "movsd" in a real function. - Add some comments to subsum-related macros in x86util. - -commit 7639d496ccc83f28166471d3a2a54292110f572c -Author: Fiona Glaser -Date: Sun Oct 4 19:15:28 2009 -0700 - - Constrained intra prediction support - Enable with --constrained-intra. Significantly reduces compression, but required for the base layer of SVC encodes and maybe some other use-cases. - - Commit sponsored by a media streaming company that wishes to remain anonymous. - -commit 8270136f6ec2fc72087d1e8f15eed849300768e6 -Author: Fiona Glaser -Date: Sun Oct 4 00:48:27 2009 -0700 - - Slightly improve non-RD p8x8 mode decision - Subpartition costs are effectively zero in CABAC if sub-8x8 search is off. - -commit c1322c3198f981adf2e1a4221afdba9cfdc9345c -Author: Fiona Glaser -Date: Sat Oct 3 00:59:02 2009 -0700 - - Reorder reference frames optimally on second pass - About +0.1-0.2% compression at normal bitrates, up to +1% at very low bitrates. - Only works if the first pass uses the same number of refs as the second (i.e. not with fast first pass). - Thus, only worthwhile at insanely slow speeds: as such, enable slow-firstpass by default with preset placebo. - Note that this changes the stats file format! - -commit deae6910e183789705532e6c94eba6dada3b9b00 -Author: Fiona Glaser -Date: Wed Sep 30 12:13:16 2009 -0700 - - Fix typo in ratecontrol_summary - -commit 9dd6842dc649734219b1207481c6746bbc6e2198 -Author: Fiona Glaser -Date: Tue Sep 29 23:32:07 2009 -0700 - - Clip log2_max_frame_num - It's still much higher than it needs to be, but that will be fixed with the upcoming MMCO patch. - Also make sure we don't write too large a frame_num or poc in slice header. - -commit d73b50e86b0d6beaa918c2855771002b19ded523 -Author: Anton Mitrofanov -Date: Sat Sep 26 12:44:53 2009 -0700 - - Fix some issues with 3-pass statsfile handling - The value of i_frame during encoder_close was incorrect. - -commit c4597c9684307df1fab0d76461eb914d031e8182 -Author: Anton Mitrofanov -Date: Sat Sep 26 12:42:46 2009 -0700 - - Fix ctrl-C termation message with few frames encoded - -commit 24ef8748abb957fc4807299d4346779e11ac6c57 -Author: Fiona Glaser -Date: Fri Sep 25 16:23:52 2009 -0700 - - Add support for single-frame VBV, improve compliance - This allows both constant-framesize and capped-framesize encoding. - Literal constant framesize isn't actually supported yet due to the lack of - filler support. - Example with 30fps video: --vbv-bufsize 200 --vbv-maxrate 6000 will ensure that - no frame is ever larger than 200 kilobits. - - One example use-case of this is for zero-delay streaming where bandwidth costs - need to be minimized. If every frame is smaller than 200 kilobits and the - client has a 6 megabit connection, every single frame can be instantly sent - to the client and handled without any decoder-side buffer. - - Fix a mistake in VBV calculation--this may have caused the VBV to be slightly - non-compliant in some situations without x264 realizing it. - Add primitive prediction handling for rows with quantizers lower than their - reference. This slightly improves VBV in CBR mode. - Various other minor improvements to VBV, mostly to make single-frame VBV work. - - Commit sponsored by a media streaming company that wishes to remain anonymous. - -commit e324d60ab2d9fd9cb5c837039a8c48e2052d1947 -Author: Fiona Glaser -Date: Thu Sep 24 08:40:45 2009 -0700 - - Fix 10l in API change - frame_num was set to 1, not 0, for the first frame. This broke spec compliance. - Didn't actually seem to cause any problems though except for breaking decoding on Quicktime. - -commit 17fcf96e7e19ec391393c0ba2a67cd6f792131a5 -Author: Steven Walters -Date: Wed Sep 23 15:04:02 2009 -0700 - - Allow user-set FPS for inputs other than YUV - -commit e0920d6fac5b51ab6ebc08482b2eace3a667cc1c -Author: Anton Mitrofanov -Date: Wed Sep 23 12:31:53 2009 -0700 - - Improve threaded frame handling - Avoid unnecessary cond_wait - -commit 510fa4fc25ac74d47ba5dc5c82aba45c8944afde -Author: Fiona Glaser -Date: Tue Sep 22 17:30:19 2009 -0700 - - Attempt to detect miscompilation due to bug in gcc 4.2 - I don't know if this bug still affects latest x264, but it can't hurt to try to detect it. - Accordingly refuse to open the encoder if detected. - Apparently VLC (on Windows) has been distributed for some time with a completely - broken x264 due to the use of a completely broken compiler (gcc 4.2). In - particular, the MV costs seem to be calculated incorrectly on win32 when linking - from an application compiled without -ffast-math to an application with - -ffast-math. - I am not entirely certain why this occurs, but the result is, unsurprisingly, - encoding quality that makes MPEG-2 look good, due to the motion search being - completely broken. - -commit b454edb2c32910ec021ac46405c87b0ad0b1ee3b -Author: Steven Walters -Date: Tue Sep 22 12:14:23 2009 -0700 - - Really fix encoder_close crash this time - Not-entirely-fixed in r1253. - -commit a54f4f2b77c7f77cb86232a291c802c1d993f7e7 -Author: Fiona Glaser -Date: Sun Sep 20 21:58:08 2009 -0700 - - Check for 16x16 partitions masquerading as smaller ones - Saves a few bits when using qpel-RD. - -commit 2fe90066553da8a2e158259bd3e20939b3778b9d -Author: David Conrad -Date: Sun Sep 20 01:16:51 2009 -0700 - - Update config.guess/sub; add Snow Leopard support - -commit 9e6650e9b523db04fa916af69bf5cfaa9fee6c4e -Author: Fiona Glaser -Date: Sat Sep 19 09:50:59 2009 -0700 - - Fix integer overflow in 2-pass VBV - Bug caused slight undersizing in 2-pass mode in some cases. - -commit c4c49802a61dd247798f50fd18c9449fcfb06977 -Author: Fiona Glaser -Date: Fri Sep 18 14:28:31 2009 -0700 - - Fix bug with various bizarre commandline combinations and mbtree - Second pass would have mbtree on even though the first pass didn't (and thus encoding would immediately fail). - -commit bbf573c75455ea02ea18bd718b65cd13a1d9a04c -Author: Fiona Glaser -Date: Thu Sep 17 13:02:02 2009 -0700 - - Add intra prediction modes to output stats - Also eliminate some NANs in stat output with intra-only encoding. - Marginal speedup: disable stat calculation if log level is below X264_LOG_INFO. - Various minor cosmetics. - -commit 90f12afa4759bc4c0dff4ebec41707a3146f6b8b -Author: Fiona Glaser -Date: Wed Sep 16 21:34:48 2009 -0700 - - Overhaul syntax in muxers.c/matroska.c - The inconsistent syntax in these files has finally come to an end. - -commit 7a0fbed78235a63bf8008d282f5db64ef1f3f2ec -Author: Fiona Glaser -Date: Wed Sep 16 20:00:00 2009 -0700 - - Major API change: encapsulate NALs within libx264 - libx264 now returns NAL units instead of raw data. x264_nal_encode is no longer a public function. - See x264.h for full documentation of changes. - New parameter: b_annexb, on by default. If disabled, startcodes are replaced by sizes as in mp4. - x264's VBV now works on a NAL level, taking into account escape codes. - VBV will also take into account the bit cost of SPS/PPS, but only if b_repeat_headers is set. - Add an overhead tracking system to VBV to better predict the constant overhead of frames (headers, NALU overhead, etc). - -commit 8e67a586e02672ef7faf001bae1813200f8fb730 -Author: Fiona Glaser -Date: Mon Sep 14 12:30:38 2009 -0700 - - Add missing fclose for mbtree input statsfile on second pass - Bug report by VFRmaniac - -commit f81f14e23534c199c20d78e553a7e427a9cf2d8a -Author: Fiona Glaser -Date: Mon Sep 14 11:07:23 2009 -0700 - - Improve progress indicator behavior - Progress indicator will now indicate based on output frame, not input frame. - -commit 3f3b67f74b53e18744b1bf754d12b6eaae9dd3c5 -Author: Fiona Glaser -Date: Mon Sep 14 03:21:14 2009 -0700 - - Update yasm configure check - lzcnt apparently requires yasm 0.6.2. - -commit b1eac26510d0532ae9202249767e5f3ba22443ef -Author: Fiona Glaser -Date: Sun Sep 13 01:02:37 2009 -0700 - - Make MV costs global instead of static - Fixes some extremely rare threading race conditions and makes the code cleaner. - Downside: slightly higher memory usage when calling multiple encoders from the same application. - -commit c8c060798aa0a43cd334f78b62fd23720024de9f -Author: Fiona Glaser -Date: Fri Sep 11 17:30:14 2009 -0700 - - Don't print scenecut message multiple times in verbose mode - Occurred mostly with b-adapt 2. - -commit 72fa3f9bbd5855ecfc2de1f1b7b1861cd2e20a21 -Author: Fiona Glaser -Date: Thu Sep 10 02:55:21 2009 -0700 - - Optimize rounding of luma and chroma DC coefficients - Reduce bitrate mostly-losslessly at low quantizers. - In some rare cases, bitrate reduction may be as high as 10%. - Luma rounding optimization (helps much less than chroma) requires trellis. - -commit 9e15b6d8e4b0c927c8ebf0abc75c4467437a91f9 -Author: Steven Walters -Date: Wed Sep 9 12:19:40 2009 -0700 - - Fix crash if encoder_close is called before delayed frames are flushed - Also no longer flush frames when ctrl-Cing x264, so x264 will close faster. - -commit 02e662e1818a7b83c3f8120b06ccbaa378a7c58e -Author: Fiona Glaser -Date: Sun Sep 6 14:55:48 2009 -0700 - - Improve x264 help - Now has three help options: --help, --longhelp, and --fullhelp. - --help only shows the most basic options; most users should not need more than these. - Add usage examples. - Fix typo in a comment. - -commit d1f4237e0b5ba0718e88ef5529567872ea82477a -Author: Fiona Glaser -Date: Sat Sep 5 19:22:21 2009 -0700 - - Factor out a redundant RD call in qpel-RD - Fixes a problem that was supposed to be, but didn't, get fully fixed in r1238. - -commit 5858d3dc48dfaacdb608659aaf8721958327b26d -Author: Fiona Glaser -Date: Sat Sep 5 18:56:18 2009 -0700 - - Fix RD early-skip - Small quality improvement and speedup, was broken by r1214. - -commit 6093a383fb0a2f82aeaf249841797ea4d4e88e1d -Author: Fiona Glaser -Date: Sat Sep 5 18:55:46 2009 -0700 - - Faster CAVLC mb header writing for B macroblocks - -commit de4c39b71d013a87aea50ed1075263dc1e579b01 -Author: David Conrad -Date: Wed Sep 2 16:14:59 2009 -0700 - - Compile fixes for pre-ARMv6T2 and/or PIC - -commit bc120190edf7db86f44ed44ffad31271ad1294c7 -Author: Steven Walters -Date: Wed Sep 2 12:33:50 2009 -0700 - - Change priority handling on some OSs - Instead of setting the lookahead thread to max priority, lower all the other threads' priorities instead. - This is particularly useful when the "max priority" is "realtime", as in Windows, which can cause some problems. - -commit 6940dcaef140d8a0c43c9a62db158e9d71a8fdeb -Author: Steven Walters -Date: Tue Sep 1 18:46:51 2009 -0700 - - Threaded lookahead - Move lookahead into a separate thread, set to higher priority than the other threads, for optimal performance. - Reduces the amount that lookahead bottlenecks encoding, greatly increasing performance with lookahead-intensive settings (e.g. b-adapt 2) on many-core CPUs. - Buffer size can be controlled with --sync-lookahead, which defaults to auto (threads+bframes buffer size). - Note that this buffer is separate from the rc-lookahead value. - Note also that this does not split lookahead itself into multiple threads yet; this may be added in the future. - Additionally, split frames into "fdec" and "fenc" frame types and keep the two separate. - This split greatly reduces memory usage, which helps compensate for the larger lookahead size. - Extremely special thanks to Michael Kazmier and Alex Giladi of Avail Media, the original authors of this patch. - -commit 7df6f5d62983432414016f5ec18f71f17626354e -Author: Fiona Glaser -Date: Tue Sep 1 11:36:54 2009 -0700 - - Force a link error in case of incompatible API - This is because the number of bug reports due to miscompiled ffmpeg builds is reaching critical mass. - The name of x264_encoder_open is now #defined based on the current X264_BUILD. - Note that this changes the calling convention required for dlopen, but not for ordinary calls to x264_encoder_open. - -commit ec2f6f4f93df9fb7c67a172669a2b629335391d5 -Author: Fiona Glaser -Date: Mon Aug 31 22:44:45 2009 -0700 - - Get rid of "CBR" descriptor from qcomp - Though technically accurate in some vague way, I have never actually seen this - option used correctly, rather it has been used by hundreds of people who can't - read the documentation and believe that qcomp=0 is what should be used for CBR - encoding. - -commit 4767b0e12e335f6057327a18992a3c97abedabbb -Author: Loren Merritt -Date: Sun Aug 30 20:49:07 2009 +0000 - - Faster me=tesa - But it still spends all too much time in me_search_ref rather than asm. - -commit 4ccbb1998c81c5533c17da91aa67b62a5d9857c8 -Author: Fiona Glaser -Date: Mon Aug 31 06:36:41 2009 -0700 - - Multi-slice encoding support - Slicing support is available through three methods (which can be mixed): - --slices sets a number of slices per frame and ensures rectangular slices (required for Blu-ray). Overridden by either of the following options: - --slice-max-mbs sets a maximum number of macroblocks per slice. - --slice-max-size sets a maximum slice size, in bytes (includes NAL overhead). - Implement macroblock re-encoding support to allow highly accurate slice size limitation. Might be useful for other things in the future, too. - -commit 57223706e5d32df207e9b3f64e2b36c4c3b78022 -Author: Fiona Glaser -Date: Sat Aug 29 17:09:55 2009 -0700 - - Fix a valgrind warning in b-adapt 2 - -commit 22342aa3bfedb2ad29fde8d236145db2021614dc -Author: Loren Merritt -Date: Sat Aug 29 10:31:08 2009 +0000 - - fix asm symbols for oprofile (regression in r1221) - -commit 5c08b9142d327c8ba910c2b399d804f6794182a5 -Author: Anton Mitrofanov -Date: Fri Aug 28 15:07:12 2009 -0700 - - Fix bug in intra analysis in B-frames - i8x8/i4x4 never got analysed when fast_intra was toggled and RD was off; up to a 2-3% quality improvement in non-RD mode. - With this bug dating back to r369, this is probably the second-oldest bug ever fixed in x264. - -commit 3c3239bb8d7d89a9879502256bcde6066fef7cb0 -Author: Anton Mitrofanov -Date: Fri Aug 28 14:56:44 2009 -0700 - - Fix bug in b16x16 qpel RD - Incorrect cost was used to initialize the search. - -commit af2739b786fb702018f0d0266dfc40d81a32162c -Author: Fiona Glaser -Date: Thu Aug 27 15:21:22 2009 -0700 - - Check minimum chroma QP in addition to luma QP during CQM init - Correctly error out if the implied minimum chroma QP is too low. - Add missing emms to checkasm macroblock_tree_propagate test. - -commit 65068aab7e2c1b923be766951f684027923ac4d6 -Author: Fiona Glaser -Date: Thu Aug 27 14:16:45 2009 -0700 - - Faster mbtree propagate and x264_log2, less memory usage - Avoid an int->float conversion with a small table. - Change lowres_inter_types to a bitfield; cut its size by 75%. - Somewhat lower memory usage with lots of bframes. - Make log2/exp2 tables global to avoid duplication. - -commit adc25db91ebef53b7883bb1587df1dd2247e4f21 -Author: Fiona Glaser -Date: Wed Aug 26 20:30:47 2009 -0700 - - Fix keyint=1 + VBV + rc-lookahead - -commit 2d3958bfda22a24f54095007b25eb96d521086f5 -Author: Fiona Glaser -Date: Wed Aug 26 20:16:10 2009 -0700 - - Faster x264_exp2fix8 - 22->13 cycles on Core 2 with mfpmath=sse - -commit 252fcf4b0a1b0318ec246f45bd934efac9de9c50 -Author: Loren Merritt -Date: Thu Aug 27 06:05:57 2009 +0000 - - compile x86 with fpmath=sse by default - -commit efa85578a7a19f3f71b0bcae194cfbfb10f2f319 -Author: David Conrad -Date: Mon Aug 24 17:17:41 2009 -0700 - - ARM configure: enable NEON-related options by default - When compiling for ARM, x264 will compile by default for Cortex A8 unless specified otherwise. - To compile for pre-ARMv6, --disable-asm is required. - -commit 918808f897cb7e1e401b1f1cba560957985f1682 -Author: Fiona Glaser -Date: Mon Aug 24 03:28:11 2009 -0700 - - 2-pass VBV fixes - Properly run slicetype frame cost with 2pass + MB-tree. - Slash the VBV rate tolerance in 2-pass mode; increasing it made sense for the highly reactive 1-pass VBV algorithm, but not for 2-pass. - 2-pass's planned frame sizes are guaranteed to be reasonable, since they are based on a real first pass, while 1-pass's, based on lookahead SATD, cannot always be trusted. - -commit 50d7fb80d8cb773cd6d495e083867c3685726352 -Author: David Conrad -Date: Mon Aug 24 01:38:42 2009 -0700 - - GSOC merge part 8: ARM NEON intra prediction assembly functions (partial) - 4x4 dc/h/ddr/ddl, 8x8 dc/h, 8x8c h/v, 16x16 dc/h/v - -commit 350a558808816ad54d3ad01d795a43920738f586 -Author: David Conrad -Date: Mon Aug 24 01:10:30 2009 -0700 - - GSOC merge part 7: ARM NEON deblock assembly functions (partial) - Originally written for ffmpeg by Mans Rullgard; ported by David. - Luma and chroma inter deblocking; no intra yet. - -commit 2dcc6072d12deaf27705dc2beb63e192bd590232 -Author: David Conrad -Date: Mon Aug 24 00:58:42 2009 -0700 - - GSOC merge part 6: ARM NEON quant assembly functions (partial) - (de)quant 4x4, (de)quant 8x8, (de)quant DC, coeff_last - -commit a591e8856ee2b919d21bcc51e5eb88e9f4fb6d94 -Author: David Conrad -Date: Sun Aug 23 02:03:48 2009 -0700 - - GSOC merge part 5: ARM NEON dct assembly functions - (i)dct4x4dc, (i)dct4x4, (i)dct8x8, (i)dct_dc, zigzag_scan_frame_4x4 - -commit 6bf21c631a0cf073ad0503e6f3a9eeabacc5078a -Author: David Conrad -Date: Sun Aug 23 01:35:10 2009 -0700 - - GSOC merge part 4: ARM NEON mc assembly functions - prefetch, memcpy_aligned, memzero_aligned, avg, mc_luma, get_ref, mc_chroma, hpel_filter, frame_init_lowres - -commit 52f9719b4c3e58aaa6cbd6d83950444e022aefea -Author: David Conrad -Date: Sat Aug 22 23:55:29 2009 -0700 - - GSOC merge part 3: ARM NEON pixel assembly functions - SAD, SADX3/X4, SSD, SATD, SA8D, Hadamard_AC, VAR, VAR2, SSIM - -commit ca7da1aecdfdccaa4f7669e915348f6d31f85827 -Author: David Conrad -Date: Sat Aug 22 23:40:33 2009 -0700 - - GSOC merge part 2: ARM stack alignment - Neither GCC nor ARMCC support 16 byte stack alignment despite the fact that NEON loads require it. - These macros only work for arrays, but fortunately that covers almost all instances of stack alignment in x264. - -commit 1a072a3a013976a178e0068be021e23b9a0ed59f -Author: David Conrad -Date: Thu Aug 20 20:44:09 2009 -0700 - - Fix unaligned accesses in bitstream writer - Fixes x264 on CPUs with no unaligned access support (e.g. SPARC). - Improves performance marginally on CPUs with penalties for unaligned stores (e.g. some x86). - -commit 77c46ebc7d35de283fc27662e21d866be1b45773 -Author: Fiona Glaser -Date: Thu Aug 20 13:08:25 2009 -0700 - - Fix bug in calculation of I-frame costs with AQ. - -commit fb62734c26f1a25f7009c9ec01849019ca454b4d -Author: David Conrad -Date: Wed Aug 19 17:03:02 2009 -0700 - - GSOC merge part 1: Framework for ARM assembly optimizations - x264 will detect which ARM core it's building for and only build NEON asm if the target is ARMv6 or above, then enable NEON at runtime. - -commit 8368e151c7b89c4a36cdc08b75ca490798d62c9d -Author: David Conrad -Date: Wed Aug 19 16:18:36 2009 -0700 - - Fix a bug in checkasm and two OSX fixes - MC chroma checkasm test could crash in some situations - Remove -lmx, as it's not needed and the iPhone doesn't have it. - Remove unused sqrtf emulation; it breaks if math.h is included. - -commit bde792fee0ac6d32d534918db870c94fe106b6e3 -Author: Fiona Glaser -Date: Wed Aug 19 01:49:47 2009 -0700 - - Improve QPRD - Always check the last macroblock's QP, even if the normal search doesn't reach it. - Raise the failure threshold when moving towards the last macroblock's QP. - 0.2-1% improved compression. - -commit 4e824bbcafaf16a4736db0028fdf6dd542f3ed35 -Author: Fiona Glaser -Date: Tue Aug 18 21:53:28 2009 -0700 - - Fix MB-tree with keyint<3 - Also slightly improve VBV keyint handling. - -commit 678b317aca6f75ecab89cf21c31e05748e9d2a5f -Author: Fiona Glaser -Date: Tue Aug 18 19:25:45 2009 -0700 - - Fix bug in VBV lookahead + no MB-tree - I-frames need to have VBV lookahead run on them as well. - -commit c83699f10f252998a42471294a8d97bb20f94296 -Author: Fiona Glaser -Date: Tue Aug 18 18:37:26 2009 -0700 - - Add support for frame-accurate parameter changes - Parameter structs can now be passed with individual frames. - The previous method would only change the parameter of what was currently being encoded, which due to delay might be very far from an intended exact frame. - Also add support for changing aspect ratio. Only works in a stream with repeating headers and requires the caller to force an IDR to ensure instant effect. - -commit 6a5a20431f448aaa43036cdaa024c8017d63fa04 -Author: Fiona Glaser -Date: Tue Aug 18 15:46:26 2009 -0700 - - Fix x264_encoder_reconfig with multithreading - New behavior: reconfigging the encoder will result in changes being applied - to each of the encoding threads as they finish encoding the current frame. - -commit ba0c03511a7c8d6c8327c07b5a5870d4746be3eb -Author: Fiona Glaser -Date: Sun Aug 16 03:29:49 2009 -0700 - - Fix two bugs in QPRD - QPRD could in some cases force blocks to skip when they shouldn't be ~(+0.01db) - Force QPRD to abide by qpmin/qpmax restrictions. - -commit 30a82c75c9bf38f47ab1dd1f505891505dda54da -Author: Fiona Glaser -Date: Sat Aug 15 19:02:31 2009 -0700 - - Lookahead VBV - Use the large-scale lookahead capability introduced in MB-tree for ratecontrol purposes. - (Does not require MB-tree, however.) - Greatly improved quality and compliance in 1-pass VBV mode, especially in CBR; +2db OPSNR or more in some cases. - Fix some other bugs in VBV, which should improve non-lookahead mode as well. - Change the tolerance algorithm in row VBV to allow for more significant mispredictions when buffer is nearly full. - Note that due to the fixing of an extremely long-standing bug (>1 year), bitrates may change by nontrivial amounts in CRF without MB-tree. - -commit 50f7afcd0a21a01ac7aae72941747c2061db8d2e -Author: Fiona Glaser -Date: Fri Aug 14 07:20:07 2009 -0700 - - Fix bug in b-adapt 1 - B-adapt 1 didn't use more than MAX(1,bframes-1) B-frames when MB-tree was off. - -commit e586d699b2f13364aa443b367dba9fe38699f5de -Author: Fiona Glaser -Date: Thu Aug 13 17:13:33 2009 -0700 - - Fix a potential failure in VBV - If VBV does underflow, ratecontrol could be permanently broken for the rest of the clip. - Revert part of the previous VBV changes to fix this. - -commit db724ac24e6a94c896e1bac6f4c7b5a5504ed773 -Author: Anton Mitrofanov -Date: Thu Aug 13 21:40:21 2009 +0000 - - new API function x264_encoder_delayed_frames. - fix x264cli on streams whose total length is less than the encoder latency. - -commit 9179e923c80d4720950ae90187bbcb2cd13f430a -Author: Fiona Glaser -Date: Thu Aug 13 14:12:26 2009 -0700 - - Add no-mbtree to fprofile (and fix pyramid in fprofile) - -commit db12af7a44c498a75b6f3d72ec8836fb75050f14 -Author: Fiona Glaser -Date: Sun Aug 9 16:06:52 2009 -0700 - - Don't print a warning about direct=auto in 2pass when B-frames are off - -commit f52973d2e93810bc86b6e8e3358d0365b97a409d -Author: Loren Merritt -Date: Thu Aug 13 05:02:59 2009 +0000 - - fix lowres padding, which failed to extrapolate the right side for some resolutions. - fix a buffer overread in x264_mbtree_propagate_cost_sse2. no effect on actual behavior, only theoretical correctness. - fix x264_slicetype_frame_cost_recalculate on I-frames, which previously used all 0 mb costs. - shut up a valgrind warning in predict_8x8_filter_mmx. - -commit e9ff8c4b1f647135f7b920fad69c616ccb08459a -Author: Loren Merritt -Date: Sun Aug 9 04:00:36 2009 +0000 - - simd part of x264_macroblock_tree_propagate. - 1.6x faster on conroe. - -commit 5599c4788e4ce72c04e536723075f7547deeaec3 -Author: Loren Merritt -Date: Sat Aug 8 14:53:27 2009 +0000 - - MB-tree fixes: - AQ was applied inconsistently, with some AQed costs compared to other non-AQed costs. Strangely enough, fixing this increases SSIM on some sources but decreases it on others. More investigation needed. - Account for weighted bipred. - Reduce memory, increase precision, simplify, and early terminate. - -commit efebe7d7b92678bfd9dacbf22068387aeff3da07 -Author: Fiona Glaser -Date: Sat Aug 8 17:51:01 2009 -0700 - - Add missing free()s for new data allocated for MB-tree - Eliminates a memory leak. - -commit 599f024c88b0978c892838c1af5a01ed1966a74f -Author: Fiona Glaser -Date: Sat Aug 8 12:53:06 2009 -0700 - - Fix keyframe insertion with MB-tree and no B-frames - -commit 4cbc551150f9649b2e636e433af2204d353b3bc9 -Author: Fiona Glaser -Date: Sat Aug 8 11:26:36 2009 -0700 - - Fix MP4 output (bug in malloc checking patch) - -commit 6eab44d4295801c28184ec13f03f9727c60129cc -Author: Steven Walters -Date: Fri Aug 7 16:18:01 2009 -0700 - - Gracefully terminate in the case of a malloc failure - Fuzz tests show that all mallocs appear to be checked correctly now. - -commit 7dec1a1574e6b94959d9bf3e997cea146289f9a7 -Author: Anton Mitrofanov -Date: Fri Aug 7 10:44:13 2009 -0700 - - Fix a potential infinite loop in QPfile parsing on Windows - ftell doesn't seem to work properly on Windows in text mode. - -commit 3667fbf979136302e990df35d850e05cf8de8115 -Author: Fiona Glaser -Date: Fri Aug 7 10:31:16 2009 -0700 - - Fix delay calculation with multiple threads - Delay frames for threading don't actually count as part of lookahead. - -commit 07178d3c8737aa9660d1ab11ace9c54bbe5724b6 -Author: Fiona Glaser -Date: Thu Aug 6 23:09:46 2009 -0700 - - Add "veryslow" preset - Apparently some people are actually *using* placebo, so I've added this preset to bridge the gap. - -commit 835ccc3cec908b1febfd31613d3e6583628116b3 -Author: Fiona Glaser -Date: Tue Aug 4 17:46:33 2009 -0700 - - Macroblock-tree ratecontrol - On by default; can be turned off with --no-mbtree. - Uses a large lookahead to track temporal propagation of data and weight quality accordingly. - Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode. - Doesn't work with b-pyramid yet. - Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat. - This makes the "medium" preset a bit slower. Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast". - All presets "fast" and above will have MB-tree on. - Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis. - Default is 40; larger values will be slower and require more memory but give more accurate results. - This value will be used in the future to control ratecontrol lookahead (VBV). - Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM. - This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters. - Quality improvement from MB-tree is about 2-70% depending on content. - Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength. - Note that MB-tree may perform slightly suboptimally on fades; this will be fixed by weighted prediction, which is coming soon. - -commit 93cc2893a9d4daf2d798f3cafddb499cabb3c0d7 -Author: Fiona Glaser -Date: Mon Aug 3 20:52:30 2009 -0700 - - Various 1-pass VBV tweaks - Make predictors have an offset in addition to a multiplier. - This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI. - We tried linear regressions, but they were very unreliable as predictors. - Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations. - Up to 1db improvement on some clips. - -commit 1d735afb29000c040daaf48040bdb4d3423f8352 -Author: Fiona Glaser -Date: Tue Jul 28 20:41:27 2009 -0700 - - Fix another 10L in QPRD - An entry in subpel_iterations was missing. - I have no idea how QPRD was working at all without this change. - -commit cd707257878590367fb9adbe2403d11e53dc5ae3 -Author: Fiona Glaser -Date: Tue Jul 28 01:16:23 2009 -0700 - - Update help and cleanup in ratecontrol.c - Deal with some out-of-date information. - -commit b8c7499d4b83c920f80f4aa09072829cc034eb5f -Author: Loren Merritt -Date: Tue Jul 28 07:16:31 2009 +0000 - - 15% faster refine_bidir_satd, 10% faster refine_bidir_rd (or less with trellis=2) - re-roll a loop (saves 44KB code size, which is the cause of most of this speed gain) - don't re-mc mvs that haven't changed - -commit b08410d07ea242250fcb827742c74046d59bd991 -Author: Fiona Glaser -Date: Mon Jul 27 21:03:00 2009 -0700 - - Faster bidir_rd plus some bugfixes - Cache chroma MC during refine_bidir_rd and use both the luma and chroma caches to skip MC in macroblock_encode. - Fix incorrect call to rd_cost_part; refine_bidir_rd output was incorrect for i8>0. - Remove some redundant clips. - ~12% faster refine_bidir_rd. - -commit 47c8783c667df1cd785cccc35752c4107fdb85dc -Author: Fiona Glaser -Date: Mon Jul 27 04:45:03 2009 -0700 - - Add "fastdecode" tune option - It does what it says it does. - -commit 9ea7b69df504b8990f339e2c8578a516f9df00c7 -Author: Fiona Glaser -Date: Sun Jul 26 12:20:09 2009 -0700 - - Fix two bugs in QPRD - fprofile settings now actually fprofile QPRD. - Don't use i_mbrd before initializing it. - -commit 11f504412ecfc03274c9b3a6c05c0f05edf440ba -Author: Fiona Glaser -Date: Sun Jul 26 03:03:12 2009 -0700 - - Fix 10l in QPRD - Trellis used wrong lambda with trellis=1 - -commit fa3b8139a19d578c12c87e20a3215b41462866b4 -Author: Fiona Glaser -Date: Sat Jul 25 22:31:06 2009 -0700 - - Fix a nondeterminism with threads and subme>7 - Also add a few more checks to eliminate the need for spel_border. - -commit 4304c427fd6419b205c42aa139bfd8cebbdf60bf -Author: Fiona Glaser -Date: Thu Jul 23 12:20:39 2009 -0700 - - Add QPRD support as subme=10 - Refactor trellis lambda selection to be done in analyse_init instead of in trellis. - This will allow for more easy adaption of lambda later on; for now it allows constant lambda across variable QPs. - QPRD is only available with adaptive quantization enabled and generally improves SSIM and visual quality. - Additionally, weight the SSD values from RD based on the relative QP offset for chroma; helps visually at high QPs where chroma has a lower QP than luma. - This fixes some visual artifacts created by QPRD at high QPs. - Note that this generally hurts PSNR and SSIM, and so is only on when psy-RD is on. - -commit d68f3b076acb1674c7cce95aaa2dc62372bbf7f4 -Author: Fiona Glaser -Date: Tue Jul 21 19:56:21 2009 -0700 - - SSSE3 cachesplit workaround for avg2_w16 - Palignr-based solution for the most commonly used qpel function. - 1-1.5% faster overall on Core 2 chips. - -commit 9dfccce4181da4d4c1e61a707c338fc65310b1ad -Author: Loren Merritt -Date: Wed Jul 22 20:20:52 2009 +0000 - - shut up valgrind warnings in trellis - -commit 2e1db1f6d5b52886b3c77338ed24096a188134b1 -Author: Anton Mitrofanov -Date: Sat Jul 18 16:30:18 2009 -0700 - - New AQ algorithm option - "Auto-variance" uses log(var)^2 instead of log(var) and attempts to adapt strength per-frame. - Generates significantly better SSIM; on by default with --tune ssim. - Whether it generates visually better quality is still up for debate. - Available as --aq-mode 2. - -commit a79dc7b5bc6e95508d8456681c30b57605a05fd0 -Author: Fiona Glaser -Date: Wed Jul 15 12:43:35 2009 -0700 - - Cacheline-split SSSE3 chroma MC - ~70% faster chroma MC on 32-bit Conroe - Also slightly faster SSSE3 intra_sad_8x8c - -commit 1921079dd03d36502308379c4437e4440a970473 -Author: Fiona Glaser -Date: Sun Jul 12 12:07:01 2009 -0700 - - Improve documentation of qp/crf options - -commit bcf540a8f14595e7fa5dcdf2d53e321f46f1deeb -Author: Fiona Glaser -Date: Thu Jul 9 19:02:57 2009 -0700 - - Merge array_non_zero into zigzag_sub - Faster lossless, cleaner code. - SSSE3 version of zigzag_sub_4x4_field, faster lossless interlaced coding. - -commit 5394872fcdde4c29a66d3e7902dca03c3b941947 -Author: James Darnley -Date: Thu Jul 9 11:25:55 2009 -0700 - - Fix bug in reference frame autoadjustment - For some types of input file, x264 did the adjustment before width/height were known. - -commit c08cdc866735168732d7cf731e5171e2f3ff04a9 -Author: Fiona Glaser -Date: Tue Jul 7 11:13:39 2009 -0700 - - Fix fprofile settings to match changes in defaults - Also add b-adapt 2 to fprofile. - -commit 1be01cb3fb9efec44be81714ae25bc272fe6c6cf -Author: Fiona Glaser -Date: Fri Jul 3 02:33:44 2009 -0700 - - Slightly faster dequant_flat assembly - Eliminate some redundant shifts. - -commit 71b9d885aacd1cc86851248af6824ed0cd965d98 -Author: Fiona Glaser -Date: Wed Jul 1 21:14:57 2009 -0700 - - Totally new preset system for x264.c (not libx264), new defaults - Other new features include "tune" and "profile" settings; see --help for more details. - Unlike most other settings, "preset" and "tune" act before all other options. - However, "profile" acts afterwards, overriding all other options. - Our defaults have also changed: new defaults are --subme 7 --bframes 3 --8x8dct --no-psnr --no-ssim --threads auto --ref 3 --mixed-refs --trellis 1 --weightb --crf 23 --progress. - Users will hopefully find these changes to greatly improve usability. - -commit 8878778c59c0417e40521e0b42412bc314ece487 -Author: Fiona Glaser -Date: Wed Jul 1 16:33:12 2009 -0700 - - Update Gabriel's email address in AUTHORS - -commit 205a032c22467c90c26d33ed9ab23d60461e57c1 -Author: Fiona Glaser -Date: Tue Jun 30 15:20:32 2009 -0700 - - Early termination for chroma encoding - Faster chroma encoding by terminating early if heuristics indicate that the block will be DC-only. - This works because the vast majority of inter chroma blocks have no coefficients at all, and those that do are almost always DC-only. - Add two new helper DSP functions for this: dct_dc_8x8 and var2_8x8. mmx/sse2/ssse3 versions of each. - Early termination is disabled at very low QPs due to it not being useful there. - Performance increase is ~1-2% without trellis, up to 5-6% with trellis=2. - Increase is greater with lower bitrates. - -commit 8a96d510fd0aef8ccf73717754482c03c4063c0d -Author: David Conrad -Date: Fri Jun 26 13:09:44 2009 -0700 - - Fix bug in checkasm - frame_init_lowres_core check didn't check the C plane. - However, all x86 and PPC assembly was correct regardless of the unit test being incorrect. - -commit e0d1cad14c5251fd21aef99c92734d461200b779 -Author: Fiona Glaser -Date: Wed Jun 24 14:39:15 2009 -0700 - - Add subpartition cost for sub-8x8 blocks - Improves sub-p8x8 mode decision. - -commit 1b3a43306c8a8efa9e45380d58c1acd488069c2a -Author: Fiona Glaser -Date: Wed Jun 24 13:24:18 2009 -0700 - - Yet more CABAC and CAVLC optimizations - Also clean up a lot of pointless code duplication in CAVLC MV coding. - -commit 90bec46ba524c3e1a4facaeb3ea21b9ef08e614b -Author: Fiona Glaser -Date: Fri Jun 19 18:49:55 2009 -0700 - - Various CABAC optimizations and cleanups - Faster CABAC CBF context calculation for inter blocks. - Add x264_constant_p(), will probably be useful in the future as well. - Simpler subpartition functions. - Clean up and optimize mvd_cpn a bit more. - Various other minor optimizations. - -commit 3a61047871d39ddaecfb58f78ce5235ca9786a2d -Author: David Wolstencroft -Date: Sat Jun 20 21:42:55 2009 +0200 - - AltiVec version of frame_init_lowres_core. 22.4x faster than C on PPC7450 and 25x on PPC970MP. - -commit 42e179e84b8563eff62efcfbee0d947f09100fd4 -Author: Fiona Glaser -Date: Fri Jun 19 16:03:18 2009 -0700 - - MMX CABAC mvd sum calculation - Faster CABAC mvd coding. - -commit 46b107980bdc234c3bff9aae10d99e7b65551426 -Author: Fiona Glaser -Date: Fri Jun 19 16:02:39 2009 -0700 - - Faster MV prediction - Smaller code size, plus I get to use goto. - -commit 84fc0be90329bffd3c3f4515463cc7348bccc366 -Author: Fiona Glaser -Date: Wed Jun 10 10:37:01 2009 -0700 - - Fix potential crash in checkasm - ssim_end4_sse2 requires aligned sums - -commit 892dad35970375e99da6e047f677964b8eb69fc8 -Author: Fiona Glaser -Date: Wed Jun 10 10:11:00 2009 -0700 - - SSSE3, faster SSE2/MMX integral_init4v - The real reason I wrote this was an excuse to use shufpd. - -commit ebd85507c8c6eadb77c360ce3966a6ad4a5341d9 -Author: Mike Frysinger -Date: Thu Jun 11 08:29:27 2009 +0000 - - configure check for uclinux - -commit b67ef31c400d107e52a7592ef19a3f62b6267920 -Author: Loren Merritt -Date: Thu Jun 11 08:27:46 2009 +0000 - - fix a crash on frame width <= 48 pixels - -commit 20889345f7d6c13a8628e45f54c48df3e6793f97 -Author: Loren Merritt -Date: Wed May 27 20:47:18 2009 +0000 - - configure check for cc, rather than reporting lack of compiler as an asm error. - configure check for -mno-cygwin, since it's removed from gcc4. - -commit 3e6b5309229856eb80d7dde016cc33ac9afa5869 -Author: Loren Merritt -Date: Sun May 24 05:01:26 2009 +0000 - - a better way to keep track of mv candidates. - 2-4% faster dia, hex, and umh. - -commit 803482488c0d220929e5338b76249b511a034204 -Author: Loren Merritt -Date: Sun May 24 05:01:19 2009 +0000 - - reorder some motion estimation patterns. - this change is useless on its own, but segregates the bitstream-changing part out of my next optimization. - -commit b53f25fa3ff4a7445386c926eb018b0cb630f59e -Author: Loren Merritt -Date: Mon May 25 19:16:05 2009 -0400 - - Fix VBV warning broken in r915 - x264 will now correctly warn about maxrate specified without bufsize even when a level is not set. - -commit ba39abd8f25ac9a094bbd9e89692e9f52de9d1ce -Author: Loren Merritt -Date: Mon May 25 07:03:10 2009 +0000 - - configure check for ssse3-capable binutils - -commit eb3759477da1397153a6be504d1caf45eea8a080 -Author: Fiona Glaser -Date: Sun May 24 16:58:08 2009 -0400 - - Fix 10L in r1155 - Broke --me esa/tesa due to forgetting to add handling for x264_cost_mv_fpel. - -commit ded0dcd5806440eda4f7ffb072f8e13f8b185171 -Author: Fiona Glaser -Date: Fri May 22 21:28:15 2009 -0700 - - Fix bug where satd was incorrectly used with subme<=1 - Faster subme<=1 with i4x4 enabled. - -commit 4078e4be9a3640c9f5b33a6734071c7009cf96f7 -Author: Fiona Glaser -Date: Fri May 22 20:40:27 2009 -0700 - - Remove some pointless error handling code in cabac/cavlc - -commit 1aed7cd36955e1dcd2ed3e5cd1605b0978e7e9c1 -Author: Fiona Glaser -Date: Fri May 22 18:40:12 2009 -0700 - - Save some memory on mv cost arrays - Have quantizers that use the same lambda share the same cost array. - -commit d6261b812226bc61c0a55531501c51ea172cda9c -Author: Fiona Glaser -Date: Fri May 22 16:57:33 2009 -0700 - - Various CABAC and CAVLC optimizations - Backport CAVLC partial-inlining early termination to CABAC (~2-4% faster CABAC residual coding) - -commit 303e985d09f6562cf3a52327d30e3120fa481008 -Author: Loren Merritt -Date: Tue May 19 02:47:15 2009 +0000 - - fix a race condition at the end of thread_input - -commit 17b86284821abc7762eea22dfcf1a72e58c6b0e8 -Author: Fiona Glaser -Date: Mon May 18 22:40:45 2009 -0400 - - Various trellis speed optimizations - -commit 8dbe5d467d218484376650b1349b5350639ea5fb -Author: Fiona Glaser -Date: Sat May 16 12:16:34 2009 -0700 - - Make i686 the default arch on x86_32 - Disabling asm will default to a generic arch. - Also fix configure for gcc 4.4. - -commit 39f9a29f31293098fceed3e7d07bc860bc03b6ad -Author: Fiona Glaser -Date: Fri May 15 20:07:59 2009 -0700 - - Faster signed golomb coding - 3% faster CAVLC RDO and bitstream writing. - -commit ba5ef93da41010d213a81d4f4c0d4db8e6fcc2d6 -Author: Fiona Glaser -Date: Thu May 14 04:11:15 2009 -0700 - - Faster spatial direct MV prediction - unroll/tweak col_zero_flag - -commit 094a4edf89facbfbba50a7578fb824bace9eaebe -Author: Fiona Glaser -Date: Mon May 4 04:19:28 2009 -0700 - - More CABAC and CAVLC optimizations - Simplified function calling for block_residual_write_(cabac|cavlc) and improved sigmap coding. - Tried making 0/1-bit specific versions of CABAC asm, but benefit was minimal under GCC 4.3. - Helped a decent bit under 3.4, but you shouldn't be using such old versions anyways. - -commit a61eab5a3d14981162805cb279c12d4ccf6302d4 -Author: Fiona Glaser -Date: Wed Apr 29 22:54:52 2009 -0700 - - Various optimizations in frametype lookahead - -commit 1f57251003aa2fa82000ba86fbb04d6911505bd8 -Author: Fiona Glaser -Date: Sun Apr 26 22:13:17 2009 -0700 - - Some cosmetics/cleanup - Move some macros to x86util.asm that should have been there to begin with. - Fix a typo that didn't cause any issues. - -commit 57505e301e81ea6fbeaa1a5503f05250335ab1d1 -Author: Guillaume Poirier -Date: Tue Apr 21 21:18:44 2009 +0000 - - fix "incompatible types in initialization" compilation issues with GCC 4.3 (which is stricter than previous compiler version) - -commit b8745339e244e3b404a0023fb1d106fdccde509c -Author: Guillaume Poirier -Date: Tue Apr 21 17:32:21 2009 +0200 - - fix conversions between vectors with differing element types or numbers of subparts errors - -commit 448ea68827a3e16fd7c8c90880fefe1d85a17c5a -Author: Fiona Glaser -Date: Sat Apr 18 16:07:53 2009 -0700 - - Add "coded blocks" stat to output information. - This measures the total percentage of blocks, intra and inter, which have nonzero coefficients. - "y,uvAC,uvDC" refers to luma, chroma DC, and chroma AC blocks. - Note that skip blocks are included in this stat. - -commit 6eb29353d0a64b719305555d0c2c0727e0efa797 -Author: Fiona Glaser -Date: Fri Apr 17 23:38:29 2009 -0700 - - Enable asm predict_8x8_filter - I'm not entirely sure how this snuck its way out of holger's intra pred patch. - -commit 840f7a5e6322c5598bd9801f06d0ed83f83fbe41 -Author: Fiona Glaser -Date: Fri Apr 17 06:00:39 2009 -0700 - - Remove various bits of dead code found by CLANG. - -commit 6217838338477b4d37110398e86f5031790ae703 -Author: Fiona Glaser -Date: Tue Apr 14 14:47:02 2009 -0700 - - Slightly faster SSE4 SA8D, SSE4 Hadamard_AC, SSE2 SSIM - shufps is the most underrated SSE instruction on x86. - -commit 2bcc39fd4cb14bb5d8776d2dc560ebdce4eaf20a -Author: Fiona Glaser -Date: Thu Apr 9 02:14:41 2009 -0700 - - Various CABAC optimizations - Move calculation of b_intra out of the core residual loop and hardcode it where applicable. - Inlining cabac_mb_mvd was unnecessary and wasted tremendous amounts of code size. Inlining only cache_mvd is faster and significantly smaller. - -commit bf749f764aa24ad77502907eaeb1ba9e0d82d035 -Author: Fiona Glaser -Date: Wed Apr 8 05:45:03 2009 -0700 - - CAVLC optimizations - faster bs_write_te, port CABAC context selection optimization to CAVLC. - -commit be3c3d21a188ed5e96d1ed146a282f156be4b677 -Author: Fiona Glaser -Date: Sun Apr 5 13:01:42 2009 -0700 - - Faster CABAC RDO - Since the bypass case is quite unlikely, especially when doing merged sigmap/level coding, - it's faster to use a branch than a cmov. - -commit 18494e61ce99907a8826bd45eba75a88e6762fea -Author: Fiona Glaser -Date: Tue Mar 31 10:36:57 2009 -0700 - - Activate intra_sad_x3_8x8c in lookahead - -commit c8fb152fd1debce5bf88173fb4b794c6b006099e -Author: Fiona Glaser -Date: Tue Mar 31 10:34:35 2009 -0700 - - MBAFF interlaced coding is not allowed in baseline profile - -commit 55ccc4ef93952285ac0d609015751110b111e2a8 -Author: Fiona Glaser -Date: Mon Mar 30 19:30:59 2009 -0700 - - intra_sad_x3_8x8 assembly - -commit 104511d6e13a2d6628ba321fe7f0cb25ac545b6f -Author: Fiona Glaser -Date: Mon Mar 30 16:37:46 2009 -0700 - - intra_sad_x3_4x4 assembly - -commit 82aef940468385dbff6e32b77477a0c80124aca9 -Author: Fiona Glaser -Date: Mon Mar 30 04:07:50 2009 -0700 - - intra_sad_x3_8x8c assembly - Also fix intra_sad_x3_16x16's use of "n" as a loop variable (broke SWAP) - -commit 291b6ab1cb56c15a0169312b1c7ee8be7a1b594b -Author: Fiona Glaser -Date: Sun Mar 29 18:27:32 2009 -0700 - - Shave one instruction off CABAC encode_decision - range_lps>>6 ranges from 4-7, so (range_lps>>6)-4 == (range_lps>>6) & 3 - -commit a937afbe27515379f40085e6c663b6f6bc4c5191 -Author: Fiona Glaser -Date: Thu Mar 26 22:22:23 2009 -0700 - - Faster probe_skip - Add a second chroma threshold after the DC transform. - -commit 861d0b1c22de140724c91fe181208ec9debf848f -Author: Fiona Glaser -Date: Thu Mar 19 12:28:21 2009 -0700 - - Add missing "static" qualifier to two arrays - Should slightly improve performance. - -commit d25d50c9ffb02571c12e13c09356fa08fe97b0b4 -Author: Fiona Glaser -Date: Tue Mar 17 11:01:57 2009 -0700 - - SSE2 zigzag_interleave - Replace PHADD with FastShuffle (more accurate naming). - This flag represents asm functions that rely on fast SSE2 shuffle units, and thus are only faster on Phenom, Nehalem, and Penryn CPUs. - -commit acd4b2641c662bbe29795e986c58a3c47de675e9 -Author: Fiona Glaser -Date: Mon Mar 9 23:37:53 2009 -0700 - - Faster integral_init - palignr to avoid unaligned loads is worth it in inith, but not initv. - -commit 1b627cce8226a45980ac0b8fa70aa3a85ad5617f -Author: Holger Lubitz -Date: Mon Mar 9 14:05:16 2009 -0700 - - Faster SSSE3 hpel_filter_v - ~10% faster hpel_filter on 64-bit Penryn. - 32-bit version by Fiona Glaser. - -commit 4030a8bdc18e2380eabd921d7cf559b40f047013 -Author: Fiona Glaser -Date: Sat Mar 7 16:43:09 2009 -0800 - - Faster SSE2 pixel_var - Optimized using the DEINTB method from r1122. ~32% faster var_16x16 on Conroe. - -commit f701ebc84812eeab34735964a84f706ef2aa9625 -Author: Fiona Glaser -Date: Sat Mar 7 00:27:27 2009 -0800 - - SSSE3 hpel_filter_v - Optimized using the same method as in r1122. Patch partially by Holger. - ~8% faster hpel filter on 64-bit Nehalem - -commit 936f76e00fb4eb35efeb1a505dd6b935d1cc3199 -Author: Fiona Glaser -Date: Fri Mar 6 18:57:15 2009 -0800 - - Update some asm copyright headers - -commit 54e38917b413e80b474d3ed7ba344e7c489b020c -Author: Holger Lubitz -Date: Fri Mar 6 18:16:30 2009 -0800 - - Vastly faster SATD/SA8D/Hadamard_AC/SSD/DCT/IDCT - Heavily optimized for Core 2 and Nehalem, but performance should improve on all modern x86 CPUs. - 16x16 SATD: +18% speed on K8(64bit), +22% on K10(32bit), +42% on Penryn(64bit), +44% on Nehalem(64bit), +50% on P4(32bit), +98% on Conroe(64bit) - Similar performance boosts in SATD-like functions (SA8D, hadamard_ac) and somewhat less in DCT/IDCT/SSD. - Overall performance boost is up to ~15% on 64-bit Conroe. - -commit 7501d9505a10d17d8cc238fd87af6330d2c1804c -Author: Fiona Glaser -Date: Fri Mar 6 15:28:47 2009 -0800 - - Update x264 copyright date - -commit 79704fa50d50a6ae756643ad69f0170e5af831fd -Author: Fiona Glaser -Date: Wed Mar 4 03:16:06 2009 -0800 - - Remove pre-scenecut from fprofile commands as well - Also add psy-trellis to fprofile - -commit b77ea4db6faa06d9120defe6fa1a5f6803d224d4 -Author: Fiona Glaser -Date: Tue Mar 3 16:21:52 2009 -0800 - - Slightly faster 8x16 SAD on Penryn Core 2 - Same as MMX 8x16 cacheline SAD, but calls SSE2 8x16 SAD in non-cacheline case. - Only Nehalem benefits from sizes smaller than 8x16, and Nehalem doesn't use cacheline functions, so no smaller versions are included. - -commit dfe8f732e6c0b4d97218b8417bda8034524eecb8 -Author: Fiona Glaser -Date: Thu Feb 26 19:50:09 2009 -0800 - - Fix scenecut and VBV with videos of width/height <= 32 - Also remove an unused variable - -commit 42f27d04b8fe0f9fb7e978edd38252d9d8a5af3d -Author: Fiona Glaser -Date: Thu Feb 26 14:29:50 2009 -0800 - - Remove non-pre scenecut - Add support for no-b-adapt + pre-scenecut (patch by BugMaster) - Pre-scenecut was generally better than regular scenecut in terms of accuracy and regular scenecut didn't work in threaded mode anyways. - Add no-scenecut option (scenecut=0 is now no scenecut; previously it was -1) - Fix an incorrect bias towards P-frames near scenecuts with B-adapt 2. - Simplify pre-scenecut code. - -commit 2d5dcf8c216cdf053ad55b29a60f941b055d2325 -Author: Guillaume Poirier -Date: Tue Mar 3 07:44:18 2009 -0800 - - Add AltiVec version of hadamard_ac. 2.4x faster than the C version. - Note this this implementation is pretty naive and should be improved - by implementing what's discussed in this ML thread: - date: Mon, Feb 2, 2009 at 6:58 PM - subject: Re: [x264-devel] [PATCH] AltiVec implementation of hadamard_ac routines - -commit 2669f7ddf56b34240248ea02a8c7f8309e2b4610 -Author: Fiona Glaser -Date: Thu Feb 26 12:07:56 2009 -0800 - - Fix regression in r1085 - Deblocking was very slightly incorrect with partitions=all. - Bug found by BugMaster. - -commit 56967517b7003192e9ac9e3110d566b2a05839f9 -Author: Fiona Glaser -Date: Mon Feb 16 05:56:12 2009 -0800 - - Optimize neighbor CBP calculation and fix related regression - r1105 introduced array overflow in cbp handling - -commit ce4de643fc96b89148a99281d3ef2edbf03e72f9 -Author: Tal Aloni -Date: Fri Feb 13 16:30:14 2009 -0800 - - Show FPS when importing a raw YUV file - -commit c6e72b86ece1b49126f1d53ff67df7e0f6f85148 -Author: Anton Mitrofanov -Date: Wed Feb 11 10:38:56 2009 -0800 - - Windows 64-bit support - A "make distclean" is probably required after updating to this revision. - -commit ef48e51d8f1cdc6d9ef30d3e3a1455d91a13d0f0 -Author: Fiona Glaser -Date: Wed Feb 11 10:35:56 2009 -0800 - - Minor fixes and cosmetics - Suppress a GCC warning, fix a non-problematic array overflow, one REP->REP_RET. - -commit 65304078db6e69f7e47505c0518c6a913cf2bc9f -Author: Manuel Rommel -Date: Tue Feb 10 12:06:47 2009 -0800 - - fix 10l in 75b495f2723fcb77f - Original thread: - date: Mon, Feb 9, 2009 at 9:37 PM - subject: [x264-devel] commit: Spare a vec_perm and a vec_mergeh though using a LUT of permutation vectors . (Guillaume Poirier ) - -commit f34ce950a7d0eb89adb052f0f96e36a55c587dde -Author: Guillaume Poirier -Date: Mon Feb 9 21:17:33 2009 +0100 - - Spare a vec_perm and a vec_mergeh though using a LUT of permutation vectors. - -commit 3c0cb9f0dd0730bebde169e29dd766ea56065c3a -Author: Guillaume Poirier -Date: Mon Feb 9 21:12:23 2009 +0100 - - Promote chroma planes to 16 byte alignment. - This will allow simplifying vectors loads that can only load 16-bytes - aligned data (such as AltiVec). - -commit 0f386fe1fb626e654b7ca94fb27fbd727c0b4e97 -Author: Fiona Glaser -Date: Mon Feb 9 11:30:54 2009 -0800 - - Fix 10L in intra pred - Forgetting a %define resulted in SIGILL on 32-bit systems without SSE (e.g. Athlon XP). - -commit 6dc8b9ad888faf6da8d88d5a2a82f39636a94fef -Author: Fiona Glaser -Date: Sun Feb 8 23:36:40 2009 -0800 - - Add decimation in i16x16 blocks - Up to +0.04db with CAVLC, generally a lot less with CABAC. - -commit c656d68ff441d2925afcb40ccfaf49279fd95656 -Author: Fiona Glaser -Date: Sat Feb 7 02:27:16 2009 -0800 - - Much faster CABAC residual context selection - Up to ~17% faster CABAC RDO, ~36% faster intra-only CABAC RDO. - Up to 7% faster overall in extreme cases. - -commit 5a7a1d14e461c431370d9111f3e6eb4efc15737f -Author: Fiona Glaser -Date: Sat Feb 7 01:57:43 2009 -0800 - - Faster coeff_last64 on 32-bit - -commit 0743869d0743d47dc93c22b6d55ca84e1851ebc2 -Author: Fiona Glaser -Date: Fri Feb 6 02:59:36 2009 -0800 - - More intra pred asm optimizations - SSSE3 version of predict_8x8_hu - SSE2 version of predict_8x8c_p - SSSE3 versions of both planar prediction functions - Optimizations to predict_16x16_p_sse2 - Some unnecessary REP_RETs -> RETs. - SSE2 version of predict_8x8_vr by Holger. - SSE2 version of predict_8x8_hd. - Don't compile MMX versions of some of the pred functions on x86_64. - Remove now-useless x86_64 C versions of 4x4 pred functions. - Rewrite some of the x86_64-only C functions in asm. - -commit 3c5cb4f10833c84fdce192c01c92b1a15145c85b -Author: Manuel Rommel -Date: Sun Feb 8 21:35:51 2009 +0100 - - Speed-up mc_chroma_altivec by using vec_mladd cleverly, and unrolling. - Also put width == 2 variant in its own scalar function because it's faster - than a vectorized one. - -commit 5f5fa1e9dc6a7dd51fa6c2da243e27fae845887d -Author: Holger Lubitz -Date: Wed Feb 4 12:46:17 2009 -0800 - - Merging Holger's GSOC branch part 2: intra prediction - Assembly versions of most remaining 4x4 and 8x8 intra pred functions. - Assembly version of predict_8x8_filter. - A few other optimizations. - Primarily Core 2-optimized. - -commit d35fe864df3c5dcff7a97cf4cc9ec8cd70f6ccb1 -Author: Guillaume Poirier -Date: Wed Feb 4 10:04:55 2009 +0000 - - 10l: fix compilation with GCC 4.3+ - -commit ded3e28cf1f593cbd1ad7c5255ba4ec82635574c -Author: Fiona Glaser -Date: Sat Jan 31 05:00:39 2009 -0800 - - Faster 8x8dct+CAVLC interleave - Integrate array_non_zero with the CAVLC 8x8dct interleave function. - Roughly 1.5-2x faster than the original separate array_non_zero method. - -commit 741e1f99b988390a7016c9e6d5b9cac01a1eab87 -Author: Fiona Glaser -Date: Sat Jan 31 01:00:26 2009 -0800 - - Measure CBP cost in i8x8 RD refinement - ~0.02-0.05db PSNR gain at high quants in intra-only encoding, pretty small otherwise. - Allows a small optimization in i8x8 encoding. - -commit 3c5f281ec05ef563e2371083105a10c2c2a84c2a -Author: Guillaume Poirier -Date: Sun Feb 1 20:58:00 2009 +0100 - - Take advantage of saturated signed horizontal sum instructions in - the variance computation epilogue since there won't be any overflow - triggering an overflow. - Suggested by Loren Merritt - -commit e394bd600ba9b1a3cee24e7d0b01dfb0acc5d1ad -Author: Fiona Glaser -Date: Fri Jan 30 03:40:54 2009 -0800 - - Massive overhaul of nnz/cbp calculation - Modify quantization to also calculate array_non_zero. - PPC assembly changes by gpoirior. - New quant asm includes some small tweaks to quant and SSE4 versions using ptest for the array_non_zero. - Use this new feature of quant to merge nnz/cbp calculation directly with encoding and avoid many unnecessary calls to dequant/zigzag/decimate/etc. - Also add new i16x16 DC-only iDCT with asm. - Since intra encoding now directly calculates nnz, skip_intra now backs up nnz/cbp as well. - Output should be equivalent except when using p4x4+RDO because of a subtlety involving old nnz values lying around. - Performance increase in macroblock_encode: ~18% with dct-decimate, 30% without at CRF 25. - Overall performance increase 0-6% depending on encoding settings. - -commit 9c55521590a2afe496394e51f6a42dc30939f8ad -Author: Guillaume Poirier -Date: Thu Jan 29 01:28:12 2009 -0800 - - Add PowerPC support for "checkasm --bench", reading the time base register. - This isn't ideal since the `time base' register is running at a fraction - of the processor cycle speed, so the measurement isn't as precise as x86's - rdtsc. - It's better than nothing though... - -commit bf81694e7e38723513740d5d312a5866a5b59215 -Author: Brad Smith -Date: Thu Jan 29 04:35:34 2009 +0000 - - fix detection of pthread and isfinite on OpenBSD - -commit 6ec3ec06cc7433a2cae2c84eb8eaa450506b5cdf -Author: Loren Merritt -Date: Tue Jan 27 05:42:51 2009 +0000 - - remove $ECHON kludge, which broke on SunOS. bring back `gcc -MT`. - remove auto-reconfigure on svn update, which has done nothing since we stopped using svn. - fix $AS on sparc (was disabled by mmx check). - fix --extra-asflags (was ignored). - mark bash scripts as bash, not sh - - patch partly by Greg Robinson and Jugdish. - -commit 0e43d5d995bb436a63934d70792e481770f406d3 -Author: Loren Merritt -Date: Mon Jan 26 14:28:48 2009 +0000 - - 1.6x faster satd_c (and sa8d and hadamard_ac) with pseudo-simd. - 60KB smaller binary. - -commit d4ca70f8398bdba2391fbcea4886ee0577494b08 -Author: Fiona Glaser -Date: Tue Jan 27 23:27:56 2009 -0800 - - Hack around a potential failure point in VBV - pred_b_from_p can become absurdly large in static scenes, leading to rare collapses of quality with VBV+B-frames+threads. - This isn't a final fix, but should resolve the problem in most cases in the meantime. - -commit 83d805fe95b5dcf0493ecf0efa77ac6d0bc43a1d -Author: Fiona Glaser -Date: Mon Jan 26 23:43:25 2009 -0800 - - Much faster chroma encoding and other opts - ~15% faster chroma encode by reorganizing CBP calculation and adding special-case idct_dc function, since most coded chroma blocks are DC-only. - Small optimization in cache_save (skip_bp) - Fix array_non_zero to not violate strict aliasing (should eliminate miscompilation issues in the future) - Add in automatic substitutions for some asm instructions that have an equivalent smaller representation. - -commit 360946d0f56a26b8b46c81088426557a628513cc -Author: Guillaume Poirier -Date: Mon Jan 26 06:28:23 2009 -0800 - - add AltiVec implementation of x264_mc_copy_w16_aligned - -commit 521bbdd13131a8e95de99779182cfe7b5fa9ecd1 -Author: Guillaume Poirier -Date: Fri Jan 23 13:53:06 2009 -0800 - - add AltiVec implementation of x264_pixel_var_16x16 and x264_pixel_var_8x8 - -commit 9e5d49f0b1599035b42d0fc0385e2d52e7f43be1 -Author: Guillaume Poirier -Date: Fri Jan 23 01:11:20 2009 -0800 - - add AltiVec 16 <-> 32 bits conversions macros - -commit 7a1bfdd1f11a2da2ec6b9c473ed0baf9047a5460 -Author: Guillaume Poirier -Date: Mon Jan 19 21:29:27 2009 +0100 - - Replace 16x16=>32 mul + pack + add by a simple 16x16=>16 multiply-add. - Suggested by Loren. - -commit 1f0e78d8ea5b0d260f8497d4817b1962f6b0894d -Author: Fiona Glaser -Date: Mon Jan 19 15:17:53 2009 -0800 - - Eliminate support for direct_8x8_inference=0 - The benefit in the most extreme contrived situation was at most 0.001db PSNR, at the cost of slower decoding. - As this option was basically useless, it was a waste of code and prevented some other useful optimizations. - Remove some unused mc code related to sub-8x8 partitions. - Small deblocking speedup when p4x4 is used. - Also remove unused x264_nal_decode prototype from x264.h. - -commit 71e87faecf863ed7776e8d8c7eb339bdd7842877 -Author: Brad Smith -Date: Mon Jan 19 05:14:53 2009 -0800 - - Add AltiVec and CPU numbers detection on OpenBSD. - -commit 7aa5a4e694f38aac7f217baa8998f197be77cbec -Author: Guillaume Poirier -Date: Sun Jan 18 22:44:14 2009 +0100 - - Add AltiVec implementation of predict_8x8c_p. 2.6x faster than scalar C. - -commit 8e485c6d2bb463d44ac6047f018c272343d07e17 -Author: Fiona Glaser -Date: Sat Jan 17 15:16:37 2009 -0500 - - Warn if direct auto wasn't set on the first pass - And, if it wasn't, run direct auto as if it was the first pass, rather than simply forcing temporal direct mode on all frames. - Also a small tweak to coeff_level_run asm. - -commit 0f822746e1427f8b237f4dc368d9ea2271b20644 -Author: Brad Smith -Date: Sat Jan 17 12:52:28 2009 +0000 - - Changes the PowerPC ppccommon.h header so it no longer checks for a particular - OS such as Linux but instead looks for HAVE_ALTIVEC_H being set. - Fixes all *BSD/PowerPC builds. - -commit da9787a42606ddf7c211e9860bd6a585fbe8a803 -Author: Guillaume Poirier -Date: Wed Jan 14 21:56:31 2009 +0100 - - update x264_hpel_filter_altivec's prototype to match the one of the C version. - It changed in commit 045ae4045a1827555b3eaab4fbf3c9809e98c58f (factorization of mallocs) - (NB: Altivec implementation wasn't allocating and writing to any scratch memory.) - -commit ed91c877df9ffcfceba8a387e7f5bf9302dc2276 -Author: Guillaume Poirier -Date: Wed Jan 14 21:49:42 2009 +0100 - - rename vector+array unions to closer match the vector typedefs names. - -commit 7f0dc1a6048fabfed78ea8d29fde226453aad328 -Author: Guillaume Poirier -Date: Wed Jan 14 21:13:58 2009 +0100 - - Add Altivec implementation of all the remaining 16x16 predict routines. - -commit 7ecbd9ea21867a269cb7a2afa0984d8c0bb6aa0e -Author: Fiona Glaser -Date: Tue Jan 13 21:11:50 2009 -0500 - - Cache ref costs and use more accurate MV costs - New MV costs should improve quality slightly by improving the smoothness of the field of MV costs (and they're closer to CABAC's actual costs). - Despite being optimized for CABAC, they still help under CAVLC, albeit less. - MV cost change by Loren Merritt - -commit 6b4b85f1e5d26a0314bbb621c3e72fb4bd43bfc6 -Author: Fiona Glaser -Date: Tue Jan 13 20:22:36 2009 -0500 - - Support forced frametypes with scenecut/b-adapt - This allows an input qpfile to be used to force I-frames, for example. - The same can be done through the library interface. - Document the format of the qpfile in --longhelp and the forcing of frametypes in x264.h - Note that forcing B-frames and B-refs may not always have the intended result. - Patch partially by Steven Walters . - -commit 3a2a2a4c29a5c835f97498885754f2be37617b22 -Author: Fiona Glaser -Date: Tue Jan 13 19:58:44 2009 -0500 - - Remove an IDIV from i8x8 analysis - Only one IDIV is left in macroblock level code (transform_rd) - -commit d7d1d37f7eb27940001ff436666da3744a1236be -Author: Fiona Glaser -Date: Thu Jan 8 15:07:16 2009 -0500 - - Fix regression in r1066 - With some combinations of video width and other settings, the scratch buffer was slightly too small. - This caused heap corruption on some systems. - Also prevent merange from being raised during encoding with esa/tesa through encoder_reconfig, as this no longer works. - -commit d52d44b319c30142903fceb09c52c9c8b64f22da -Author: Fiona Glaser -Date: Tue Jan 6 16:55:44 2009 -0500 - - Disable B-frames in lossless mode - They hurt compression anyways, and direct auto was bugged with lossless. - -commit cac42177137a54acfa6bf8b1368a9978fc9cf562 -Author: Brad Smith -Date: Mon Jan 5 22:53:11 2009 +0000 - - Factorize in ppccommon.h the conditional inclusion of altivec.h on Linux systems. - -commit 438ca2d81828f3fcc72b25a792d64b3649627a42 -Author: Brad Smith -Date: Mon Jan 5 15:58:32 2009 -0500 - - Disable __builtin_clz() intrinsic on gcc versions prior to 3.4. - The function did not exist before that version. - -commit d1f4f0c7cd3502acdda273df55143de701cebc6a -Author: Fiona Glaser -Date: Thu Jan 1 21:44:00 2009 -0500 - - Small tweaks to coeff asm - Factor out a few redundant pxors - Related cosmetics - -commit 6c15d57cfc40e0d1bd3529cdc82f2e8ac92734fb -Author: Steven Walters -Date: Tue Dec 30 22:20:37 2008 -0500 - - Use the correct strtok under MSVC - Also change one malloc -> x264_malloc - -commit 30b14a489b3983426dfb2beceb6f76cc485067c6 -Author: Fiona Glaser -Date: Tue Dec 30 22:14:45 2008 -0500 - - Add stack alignment for lookahead functions - Should allow libx264 to be called from non-gcc-compiled applications without adding force_align_arg_pointer. - -commit cb688111fb28225a4d1fe2a45472ac0cd093a08f -Author: Fiona Glaser -Date: Tue Dec 30 20:47:45 2008 -0500 - - Add support for SSE4a (Phenom) LZCNT instruction - Significantly speeds up coeff_last and coeff_level_run on Phenom CPUs for faster CAVLC and CABAC. - Also a small tweak to coeff_level_run asm. - -commit 9e1f300078a010d07a7331c796d08d78c624c772 -Author: Steven Walters -Date: Mon Dec 29 05:14:26 2008 +0000 - - factor mallocs out of hpel, ssim, and esa. - there should now be no memory allocation outside of init-time. - -commit ffd73767089b2db2ca9a06891e10883fe2bcb3e2 -Author: Fiona Glaser -Date: Tue Dec 30 03:12:17 2008 +0000 - - Much faster CAVLC RDO and bitstream writing - Pure asm version of level/run coding. Over 2x faster than C. - Up to 40% faster CAVLC RDO. Overall benefit up to ~7.5% with RDO or ~5% with fast encoding settings. - -commit f33ba9e2bc8d344c515f2d5f662958d323f3a074 -Author: Loren Merritt -Date: Mon Dec 29 21:52:25 2008 -0500 - - Cosmetics: cleaner syntax for defining temporary registers in asm - Globally define t#[qdwb], so that only t# needs to be locally defined when reorganizing registers - -commit 406a40dc41438edac3f60d231eb9196b3d33008f -Author: Fiona Glaser -Date: Sat Dec 27 21:36:14 2008 -0500 - - Much faster CABAC RDO - Since RDO doesn't care about what order bit costs are calculated, merge sigmap and level coding into the same loop in RDO. - This is bit-exact for 4x4dct but slightly incorrect for 8x8dct due to the sigmap containing duplicated contexts. - However, the PSNR penalty of this is extremely small (~0.001db). - Speed benefit is about 15% in 4x4dct and 30% in 8x8dct residual bit cost calculation at QP20. - Overall encoding speed benefit is up to 5%, depending on encoding settings. - Also remove an old unnecessary CABAC table that hasn't been used for years. - -commit 131d066e4c79f4fe29ce9e70926ffd7faaf9b833 -Author: Fiona Glaser -Date: Fri Dec 26 07:35:49 2008 -0500 - - VLC table optimizations - Slightly reorganize VLC tables for ~2% faster block_residual_write_cavlc. - Also a small optimization in p8x8 CAVLC. - -commit 0ad4944b15f0de78a686e49353d61205aac6526f -Author: Loren Merritt -Date: Wed Dec 24 22:58:17 2008 -0500 - - Fix crash in --me esa/tesa introduced in r1058 - Also suppress the last mingw warning message - -commit 9fe6e5e6fbeb045787bc47fd1fd073855510d427 -Author: Fiona Glaser -Date: Tue Dec 23 22:33:28 2008 -0500 - - Optimize variance asm + minor changes - Remove SAD argument from var, not needed anymore. - Speed up var asm a bit by eliminating psadbw and instead HADDWing at end. - Eliminate all remaining warnings on gcc 3.4 on cygwin - Port another minor optimization from lavc (pskip) - -commit 8761805b8240c0da5f9d6d79b1a2affe3b5213ad -Author: Fiona Glaser -Date: Tue Dec 23 18:31:48 2008 -0500 - - Minor CABAC cleanups and related optimizations - Merge the two list tables to allow cleaner MC/CABAC/CAVLC code - Remove lots of unnecessary {s - Port some very minor opts from lavc - -commit bc29c635327d79f6a5372df30477db28635e3846 -Author: Loren Merritt -Date: Thu Dec 11 19:47:17 2008 +0000 - - faster ESA init - reduce memory if using ESA and not p4x4 - -commit 8e5d63a544efb6eb0f6677f718033f049c1ccd56 -Author: Fiona Glaser -Date: Mon Dec 15 23:02:49 2008 -0800 - - More macroblock_cache optimizations - Patch partially by Loren Merritt - -commit f9307df88e39cafa30db47249895fdd1745cc1aa -Author: Fiona Glaser -Date: Mon Dec 15 13:15:29 2008 -0800 - - Faster macroblock_cache_rect - Explicit loop unrolling - -commit 69dc9f4dbe3283c4069bd4d1ddd4685510714375 -Author: Fiona Glaser -Date: Sun Dec 14 18:30:51 2008 -0800 - - Optimizations in predict_mv_direct - Add some early terminations and minor optimizations - This change may also fix the extremely rare direct+threading MV bug. - -commit 9b8370b395349918b8e4171b2bcbeacf83d67231 -Author: David Wolstencroft -Date: Sun Dec 14 10:47:28 2008 +0000 - - Fix visual corruption when picture width was not mod 32. - The previous Altivec implemention of mc_chroma assumed that i_src_stride was always mod 16. - -commit 664a4e41959dcecf5030196a91a48aac667b8c35 -Author: Guillaume Poirier -Date: Mon Dec 8 21:11:45 2008 +0100 - - Add support for FSF GCC version >= 4.3 on OSX. - So far, only Apple GCC version was supported. - -commit fa6728b60d2ab3bef9e9ab29635672e0c6697c3d -Author: Fiona Glaser -Date: Thu Dec 11 17:31:52 2008 -0800 - - More accurate refcost for p8x8 CAVLC - Slightly better quality, especially in non-RD mode, with CAVLC. - -commit 6abf5d67010f8c3889f3184769e09f12fbe473c2 -Author: Loren Merritt -Date: Wed Dec 10 20:54:17 2008 -0800 - - use lookup tables instead of actual exp/pow for AQ - Significant speed boost, especially on CPUs with atrociously slow floating point units (e.g. Pentium 4 saves 800 clocks per MB with this change). - Add x264_clz function as part of the LUT system: this may be useful later. - Note this changes output somewhat as the numbers from the lookup table are not exact. - -commit b219d4fcfbb0ac904ff0e7e4ed67c3511e6596a8 -Author: Fiona Glaser -Date: Wed Dec 10 20:53:13 2008 -0800 - - Suppress saveptr warnings on Windows GCC - -commit e0779152f7c9489ba89481272537e9ac0a1f733a -Author: Fiona Glaser -Date: Wed Dec 10 20:52:06 2008 -0800 - - More small speed tweaks to macroblock.c - -commit 99448f6c98289e74f1234e38b9ed2c945f2bdfca -Author: Fiona Glaser -Date: Mon Dec 8 13:44:23 2008 -0800 - - Much faster CAVLC residual coding - Use a VLC table for common levelcodes instead of constructing them on-the-spot - Branchless version of i_trailing calculation (2x faster on Nehalem) - Completely remove array_non_zero_count and instead use the count calculated in level/run coding. Note: this slightly changes output with subme > 7 due to different nonzero counts being stored during qpel RD. - -commit 89a893a0b153cbc9fa5143aad15b17581b9b448b -Author: Guillaume Poirier -Date: Fri Dec 5 22:26:55 2008 +0100 - - fix compilation with GCC-4.3+ - -commit fa800b23cc0e1eb5ca603e845d977952ee63ddd6 -Author: Fiona Glaser -Date: Sat Nov 29 23:13:58 2008 -0800 - - High Profile allows 25% higher maxbitrate/cpb - Correct level detection to take this into account. - -commit bf65c5583d7582d2f0446a2848b37f4663369135 -Author: BugMaster -Date: Sat Nov 29 14:04:29 2008 -0800 - - s/nasm/yasm in VS project file - -commit 19ebada1c9a67ac0837936eb9269224cb3ce8dd7 -Author: Fiona Glaser -Date: Sat Nov 29 04:49:18 2008 -0800 - - Cosmetic: update various file headers. - -commit 85c217958c677b164305582e3c6304bf42f1bac5 -Author: Loren Merritt -Date: Sat Nov 29 11:54:02 2008 +0000 - - add date and compiler to `x264 --version` - -commit df72b08c60856a71f4a15634a6a87e0fe34bca15 -Author: Fiona Glaser -Date: Fri Nov 28 14:32:11 2008 -0800 - - 10L in r1041 - -commit c1d73389eaaebb29ca69f7436ea5a8a707a555c9 -Author: Fiona Glaser -Date: Thu Nov 27 19:37:56 2008 -0800 - - Significantly faster CABAC and CAVLC residual coding and bit cost calculation - Early-terminate in residual writing using stored nnz counts - To allow the above, store nnz counts for luma and chroma DC - Add assembly functions to find the last nonzero coefficient in a block - Overall ~1.9% faster at subme9+8x8dct+qp25 with CAVLC, ~0.7% faster with CABAC - Note this changes output slightly with CABAC RDO because it requires always storing correct nnz values during RDO, which wasn't done before in cases it wasn't useful. - CAVLC output should be equivalent. - -commit ecb04a3ba99324dd6a319224a0dae4e3fa962b40 -Author: Fiona Glaser -Date: Wed Nov 26 23:42:55 2008 -0800 - - dequant_4x4_dc assembly - About 3.5x faster DC dequant on Conroe - -commit 6ce71ce7b935bdd7efb0c843dafd0c208194ab65 -Author: Loren Merritt -Date: Thu Nov 27 02:37:46 2008 +0000 - - fix an overflow in dct4x4dc_mmx - (unlikely to have occurred in any real video) - -commit c5c0a7fd77b039fcec891ab97e34d9d40fec6839 -Author: Fiona Glaser -Date: Tue Nov 25 16:30:39 2008 -0800 - - Remove nasm support - Nasm won't correctly parse the SSE4 code introduced a few revisions ago, so we're removing support. - Users should upgrade to yasm 0.6.1 or later. - -commit 0e58d0373bb8586f78eb1b95221b347123689e3c -Author: BugMaster -Date: Tue Nov 25 15:11:24 2008 -0800 - - Fix rare warning messages in ratecontrol due to r1020 - -commit 632e09999c9cf5828d80aa98ec357181607d4447 -Author: BugMaster -Date: Tue Nov 25 15:10:43 2008 -0800 - - Fix MSVC compilation and clean up MSVC build file - Remove Release64 which never worked anyways. - -commit 69e69197c424bff9e4b90eb5d608f15b59ca77b4 -Author: Fiona Glaser -Date: Tue Nov 25 01:04:26 2008 -0800 - - Faster width4 SSD+SATD, SSE4 optimizations - Do satd 4x8 by transposing the two blocks' positions and running satd 8x4. - Use pinsrd (SSE4) for faster width4 SSD - Globally replace movlhps with punpcklqdq (it seems to be faster on Conroe) - Move mask_misalign declaration to cpu.h to avoid warning in encoder.c. - These optimizations help on Nehalem, Phenom, and Penryn CPUs. - -commit e76caf368c7044fdd1eff6a423d9518e9818a4ba -Author: Guillaume Poirier -Date: Tue Nov 25 17:27:27 2008 +0100 - - fix indentation, whitespace cleanup, more consistent indentation of macro backslashes - -commit 49b16f1d1ca3ebe90c43ec950ca279b842892061 -Author: David Wolstencroft -Date: Sat Nov 22 17:54:38 2008 +0100 - - Change some macros to be more sensitive to memory alignment, thus avoiding - useless loads/stores and calculations of permutation vectors. - Affected functions are all of mc_luma, mc_chroma, 'get_ref', SATD, SA8D and deblock. - Gains globally vary from ~5% - 15% on a depending on settings running on a 1.42 ghz G4. - -commit e56a842d2e5ae5e4cdc412adb73e7c952e0f29cb -Author: Loren Merritt -Date: Fri Nov 7 05:31:24 2008 +0000 - - refactor satd. 20KB smaller binary. - refactor sa8d. slightly faster. - more checkasm for hadamard. - -commit d2c6e84dcafff29944ddb43ac58dbb7c23b33605 -Author: Fiona Glaser -Date: Mon Nov 24 21:56:24 2008 -0800 - - Fix crash with threads and SSEMisalign on Phenom - Misalign mask needed to be set separately for each encoding thread. - -commit 80ea99c001eaab58a0ff54f0b2c4815cb2e63076 -Author: Fiona Glaser -Date: Fri Nov 21 03:39:11 2008 -0800 - - Phenom CPU optimizations - Faster hpel_filter by using unaligned loads instead of emulated PALIGNR - Faster hpel_filter on 64-bit by using the 32-bit version (the cost of emulated PALIGNR is high enough that the savings from caching intermediate values is not worth it). - Add support for misaligned_mask on Phenom: ~2% faster hpel_filter, ~4% faster width16 multisad, 7% faster width20 get_ref. - Replace width12 mmx with width16 sse on Phenom and Nehalem: 32% faster width12 get_ref on Phenom. - Merge cpu-32.asm and cpu-64.asm - Thanks to Easy123 for contributing a Phenom box for a weekend so I could write these optimizations. - -commit 7df060bedbc72232fdf48869cea47bcd480e8eda -Author: Fiona Glaser -Date: Thu Nov 20 20:11:14 2008 -0800 - - A few tweaks to decimate asm - A little bit faster on both 32-bit and 64-bit - -commit a99183d3685c26d6d7815d6be3fe28fcd77c94bf -Author: Fiona Glaser -Date: Wed Nov 12 16:50:31 2008 -0800 - - Nehalem optimization part 2: SSE2 width-8 SAD - Helps a bit on Phenom as well - ~25% faster width8 multiSAD on Nehalem - -commit 4975e8187193c5f0bcc6b91b88c43e50482b8a1e -Author: Fiona Glaser -Date: Mon Nov 10 23:34:02 2008 -0800 - - Add subme=0 (fullpel motion estimation only) - Only for experimental purposes and ultra-fast encoding. Probably not a good idea for firstpass. - -commit ebe1103b4c8e2e7f61e294b65c70756c645fee49 -Author: Fiona Glaser -Date: Mon Nov 10 15:34:48 2008 -0800 - - Fix minor memory leak in r1022 - -commit ac675e30c3c47a409d015b2d4f6d6f495f53e417 -Author: Fiona Glaser -Date: Mon Nov 10 15:32:06 2008 -0800 - - r1024 borked checkasm - Remove idct/dct2x2 from checkasm as they are no longer in dctf - -commit be1211807c0725803253d44f47cd6305ffbaddf9 -Author: Fiona Glaser -Date: Sun Nov 9 17:39:21 2008 -0800 - - Faster chroma encoding - 9-12% faster chroma encode. - Move all functions for handling chroma DC that don't have assembly versions to macroblock.c and inline them, along with a few other tweaks. - -commit ae51235dd5ad1f9b6396a857f478b4f391cffcff -Author: Fiona Glaser -Date: Sun Nov 9 17:34:31 2008 -0800 - - Various cosmetics and minor fixes - Disable hadamard_ac sse2/ssse3 under stack_mod4 - Fix one MSVC compilation warning - Fix compilation in debug mode in certain cases on x64 - Remove eval.c from MSVC project - Fix crash when VBV is used in CQP mode - Patches by MasterNobody - -commit 0c841de6810678f3da1c06a34595cb490d59eeb6 -Author: Fiona Glaser -Date: Sat Nov 8 20:16:17 2008 -0800 - - Faster b-adapt + adaptive quantization - Factor out pow to be only called once per macroblock. Speeds up b-adapt, especially b-adapt 2, considerably. - Speed boost is as high as 24% with b-adapt 2 + b-frames 16. - -commit f2a12915c1df7df87a816d07d724e4f1f7b00729 -Author: Fiona Glaser -Date: Fri Nov 7 11:39:43 2008 -0800 - - Faster CABAC residual encoding - 6% faster block_residual_write_cabac in RD mode. - -commit a7831e46278fed3f7f907b7b1687b1f877e9fb1e -Author: Fiona Glaser -Date: Wed Nov 5 19:51:59 2008 -0800 - - Fix potential crash in the case that the input statsfile is too short - Also resolve various other potential weirdness (such as multiple copies of the same error message in threaded mode). - -commit 1bf7228f7e975e9220daae5a439797aaea2aa511 -Author: Fiona Glaser -Date: Wed Nov 5 03:11:45 2008 -0800 - - Initial Nehalem CPU optimizations - movaps/movups are no longer equivalent to their integer equivalents on the Nehalem, so that substitution is removed. - Nehalem has a much lower cacheline split penalty than previous Intel CPUs, so cacheline workarounds are no longer necessary. - Thanks to Intel for providing Avail Media with the pre-release Nehalem CPU needed to prepare these (and other not-yet-committed) optimizations. - Overall speed improvement with Nehalem vs Penryn at the same clock speed is around 40%. - -commit fc321fd6ae4425eb2fba677eba5dc5ce36e98dd4 -Author: Gabriel Bouvigne -Date: Tue Nov 4 09:56:03 2008 -0800 - - Fix potential infinite loop in VBV under GCC 4.2 - -commit 16e3ef85b4d1162ac46e4b6b384bc61481dbaf7a -Author: Fiona Glaser -Date: Mon Nov 3 22:59:49 2008 -0800 - - Encoder_reconfig: esa/tesa can only be enabled if they were on to begin with - Bug report by kemuri-_9. - -commit ca49901f75ba26ec4e1c7e0c448bfc759e78f961 -Author: Loren Merritt -Date: Thu Oct 30 00:47:09 2008 -0700 - - Fix bug in hadamard_ac SSE assembly - Some extreme inputs could cause overflows. - -commit fb1af79ef8b0c18a317a0582077f12ac63d6c9f0 -Author: Fiona Glaser -Date: Tue Oct 28 20:35:15 2008 -0700 - - Full sub8x8 RD mode decision - Small speed penalty with p4x4 enabled, but significant quality gain at subme >= 6 - As before, gain is proportional to the amount of p4x4 actually useful in a given input at the given bitrate. - -commit e09f55ccc3b9ffee42d6ed6a86cbe88ef603b05b -Author: Fiona Glaser -Date: Sat Oct 25 01:50:08 2008 -0700 - - Optimize CABAC bit cost calculation - Speed up cabac mvd and add new precalculated transition/entropy table. - Add "noup" function for cabac operations to not update the state table when it isn't necessary. - 1-3% faster macroblock_size_cabac. - Cosmetics - -commit b875aa64ce1e416347af55cac7326ab72456eb68 -Author: Anders Ossowicki -Date: Thu Oct 23 22:36:11 2008 -0700 - - Replace "git-command" with "git command" in version.sh for git 1.6 support - -commit e9a6bd75f7203790a256d2cfb8838f2c06404410 -Author: Loren Merritt -Date: Thu Oct 23 13:45:04 2008 -0700 - - Add assembly version of CAVLC 8x8dct interleave - Faster CAVLC encoding and RDO with 8x8dct - -commit f151cc4b9bd513f06511519ddb89b1ee80a722eb -Author: Alexander Strange -Date: Wed Oct 22 15:55:30 2008 -0700 - - Add support for psy-rd/trellis to encoder_reconfig - -commit f7cc3064f5edd0e75afed26904b57e6704dcaabf -Author: Alexander Strange -Date: Wed Oct 22 15:00:43 2008 -0700 - - Fix Darwin speed regression - -commit 5254d366adc8b0a926df197cd96a689c8583a370 -Author: Gabriel Bouvigne -Date: Wed Oct 22 14:48:47 2008 -0700 - - Further improve prediction of bitrate and VBV in threaded mode - -commit 5993b7e968fc1154a4fae417fb9dbb0157c60cf8 -Author: Fiona Glaser -Date: Wed Oct 22 13:37:09 2008 -0700 - - Sub-8x8 Qpel-RD in P-frames - Improves quality when using p8x4/p4x8/p4x4 subpartitions - Benefit is proportional to how many sub-8x8 partitions are used; helps most at high bitrates and low resolutions. - -commit fe5f0a473508ef3154c7f0809b8500c3ddd5eee2 -Author: Fiona Glaser -Date: Wed Oct 22 02:20:06 2008 -0700 - - Faster qpel-RD - 3-4% faster qpel-RD; avoid re-checking bmv/pmv during the hex search. - -commit d17e81e26ae3fc3093182c00af254ceac9bed21f -Author: Fiona Glaser -Date: Wed Oct 22 00:37:00 2008 -0700 - - Some minor optimizations in RD refinement - Don't write b subpartition in CABAC RDO - Calculate nonzero count in i4x4 CAVLC RDO - -commit 91522693403f9bc06985f8e4e9aebb6d4b43fc5a -Author: Fiona Glaser -Date: Tue Oct 21 20:17:18 2008 -0700 - - Faster deblocking when p4x4 isn't used - Most of the MV checks can be skipped, resulting in faster strength calculation - -commit 09c6a0b2812d2f60c69658b62c22ac8a71ba39a9 -Author: Fiona Glaser -Date: Tue Oct 21 19:38:21 2008 -0700 - - Print profile and level information upon starting encode - Previously level was only printed as part of autodetect, and only in verbose mode. - -commit 21afe78c85469ae11e7ad638c0a9f958c6573933 -Author: Fiona Glaser -Date: Tue Oct 21 17:10:46 2008 -0700 - - Fix possible crash in trellis at very low QPs - -commit d1fbc652674e9aed6c83ecb321b09891ea5c7e05 -Author: Fiona Glaser -Date: Tue Oct 21 14:59:07 2008 -0700 - - Add assembly versions of decimate_score - 3-7x faster decimation, 1-3% faster overall - -commit 8d6b262d3c806ae4e8380a6b0c6d31c6c105dba7 -Author: Fiona Glaser -Date: Sat Oct 18 03:40:59 2008 -0700 - - Fix typo in subme8/9 lossless qpel-RD - Slightly improves compression. - -commit a516e8e497f098985152aa047de95ffd20b578bb -Author: Fiona Glaser -Date: Thu Oct 16 03:17:53 2008 -0700 - - Extend trellis to support luma/chroma DC and chroma AC - Small speed loss in trellis 1, slightly larger in trellis 2, but significant quality improvement. - -commit e21bc3443d5717d0960130486f6b8b712d2be8df -Author: Loren Merritt -Date: Thu Oct 2 20:57:08 2008 -0600 - - rm gtk, avc2avi. - I don't remember why I allowed a gui into the repository in the first place. There's nothing that makes this one special relative to all the other x264 guis. - avc2avi doesn't compile since we removed the bitstream reader. And avc doesn't belong in avi. - -commit be4be30ff33ccf0cbe7ed5f275e89c87b5927c86 -Author: Fiona Glaser -Date: Thu Oct 2 18:11:13 2008 -0700 - - Resolve quality regression in r996 - Accidentally removed the wrong line of code. I think this classifies as a "10l". - Thanks to techouse for initial bug report and skystrife for helping me find it. - -commit 9df640c440e2a5b51683de01ba5a76e72ecc44f3 -Author: Ralf Terdic -Date: Thu Oct 2 08:52:33 2008 -0700 - - Fix minor memory leak accidentally added with the addition of b-adapt 2 - -commit 60455fff82906da0237a4f56b3686a588579e41f -Author: Fiona Glaser -Date: Tue Sep 30 18:34:56 2008 -0700 - - Rework subme system, add RD refinement in B-frames - The new system is as follows: subme6 is RD in I/P frames, subme7 is RD in all frames, subme8 is RD refinement in I/P frames, and subme9 is RD refinement in all frames. - subme6 == old subme6, subme7 == old subme6+brdo, subme8 == old subme7+brdo, subme9 == no equivalent - --b-rdo has, accordingly, been removed. --bime has also been removed, and instead enabled automatically at subme >= 5. - RD refinement in B-frames (subme9) includes both qpel-RD and an RD version of bime. - -commit 9b10152ffdc006d98a1ddea8ee19d1fdc70a0141 -Author: Fiona Glaser -Date: Mon Sep 29 00:11:38 2008 -0700 - - Fix potential miscompilation of some inline asm - Caused problems under some gcc 4.x versions with predictive lossless - -commit a9e86d248d8d5f1e892159a7d86dcea2f884a859 -Author: Fiona Glaser -Date: Sat Sep 27 16:37:27 2008 -0700 - - Replace High 4:4:4 profile lossless with High 4:4:4 Predictive. - This improves lossless compression by about 4-25% depending on source. - The benefit is generally higher for intra-only compression. - Also add support for 8x8dct and i8x8 blocks in lossless mode; this improves compression very slightly. - In some rare cases 8x8dct can hurt compression in lossless mode, but its usually helpful, albeit marginally. - Note that 8x8dct is only available with CABAC as it is never useful with CAVLC. - High 4:4:4 Predictive replaced the previous profile in a 2007 revision to the H.264 standard. - The only known compliant decoder for this profile is the latest version of CoreAVC. - As I write this, JM does not actually correctly decode this profile. - Hopefully this lack of support will soon change with this commit, as x264 will be (to my knowledge) the first compliant encoder. - -commit adccf49a631a9e424dc0e86476752d511065582d -Author: Fiona Glaser -Date: Fri Sep 26 09:19:56 2008 -0700 - - Fix typo in progress indicator when using piped input - -commit cb173c5044fcc4792b7978720884cea7aa2e3848 -Author: Loren Merritt -Date: Mon Sep 22 04:17:35 2008 -0600 - - avg_weight_ssse3 - -commit 3e5b130aba1ae8e1cc49b9a7ddf138abe6d78934 -Author: Loren Merritt -Date: Sat Sep 20 08:41:17 2008 -0600 - - fix bitstream writer on bigendian 64bit (regression in r903) - -commit 8292f8945ade54d7ac1e171c94bfe67409c41b20 -Author: Loren Merritt -Date: Fri Sep 19 23:52:11 2008 -0600 - - remove authors whose code no longer exists - -commit a70f8802d945d4fdd061a661b9af0a432362903e -Author: Loren Merritt -Date: Mon Sep 15 05:00:26 2008 -0600 - - more diagnostics when configure finds an unsuitable assembler - -commit 2103b3579b617a7d51d9b5e61953bc5531232948 -Author: Fiona Glaser -Date: Fri Sep 26 09:19:56 2008 -0700 - - Make x264 progress indicator more concise - Now the % indicator should be readable on the header of a minimized window on Windows systems. - -commit cd5919121f613431f483b52452f92e8195217974 -Author: Fiona Glaser -Date: Sun Sep 21 22:17:34 2008 -0700 - - Fix deblocking + threads + AQ bug - At low QPs, with threads and deblocking on, deblocking could be improperly disabled. - Revision in which this bug was introduced is unknown; it may be as old as b_variable_qp in x264 itself. - -commit c7d9960a8d91e1fdd207fc7ad7c6f130f573e53f -Author: Fiona Glaser -Date: Sun Sep 21 13:35:00 2008 -0700 - - Resolve possible crash in bime, improve the fix in r985 - -commit fab9d57a8c1327b97c01b83f5e9d58315622250c -Author: Fiona Glaser -Date: Sat Sep 20 19:36:07 2008 -0700 - - Fix rare crash issue in b-adapt - Regression *probably* in r979 - -commit 78798908acafad0ec536bcf6a81a95f50f5461a4 -Author: Holger Lubitz -Date: Sat Sep 20 02:36:55 2008 -0700 - - Merging Holger's GSOC branch part 1: hpel_filter speedups - -commit 20c01eefb707ad5c7291cd882d118ba8f6cf9d9a -Author: Loren Merritt -Date: Sat Sep 20 12:31:10 2008 -0600 - - r980 borked weighted bime - -commit 57c472ce6a12188041a04213543f2394d74962af -Author: Fiona Glaser -Date: Sat Sep 20 01:39:16 2008 -0700 - - Disable I_PCM with psy-RD - psy-RD seems to put the PCM threshold a bit lower than it should be, so PCM is now disabled under psy-RD. - -commit 42d57caaaf2de55d131e677a7fa8231148432435 -Author: Fiona Glaser -Date: Fri Sep 19 09:21:34 2008 -0700 - - Merge avg and avg_weight - avg_weight no longer has to be special-cased in the code; faster weightb - -commit b7d27eaab35a6fdffc66ffff51bd287b0f67bb3e -Author: Fiona Glaser -Date: Wed Sep 17 21:25:05 2008 -0700 - - Rewrite avg/avg_weight to take two source pointers - This allows the use of get_ref instead of mc_luma almost everywhere for bipred - -commit c4f3dabecde673fabcefa832fff490af9d738641 -Author: Fiona Glaser -Date: Wed Sep 17 00:33:37 2008 -0700 - - Use low-resolution lookahead motion vectors as an extra predictor - Improves quality considerably (0-5%) in 1pass/CRF mode, especially with lower --me values and complex motion. - Reverses the order of lowres lookahead search to improve the usefulness of the extra predictors. - -commit f8f5313909c20420a6e6efd69cbe8ec5147a12ac -Author: Fiona Glaser -Date: Tue Sep 16 22:44:10 2008 -0700 - - Add missing free() for f_qp_offset in frame.c - -commit d8163ffd10fb290520d096d8b05cae2f727ac9bf -Author: Gabriel Bouvigne -Date: Tue Sep 16 01:54:37 2008 -0700 - - Correct misprediction of bitrate in threaded mode - Improves bitrate accuracy in cases with large numbers of threads. - Loosely based on a patch by BugMaster. - -commit 08e737d12b3eaf4c2d6c1b8bbcd18628684221eb -Author: Gabriel Bouvigne -Date: Tue Sep 16 01:53:02 2008 -0700 - - Fix a case in which VBV underflows can occur - Fix a potential case where a frame might be initially allocated too low a QP, which would then have to be raised a low during row-based ratecontrol. - In some cases, this could even produce VBV underflows in 2pass mode. - -commit bdb435f73dda80e54ae6b4f5c861bd62ed99ed3d -Author: Panagiotis Issaris -Date: Mon Sep 15 20:47:50 2008 +0200 - - Use correct format specifier for uint64_t - -commit c299b7d87e9ee6e7fca6b7d234847cc20eacc688 -Author: Fiona Glaser -Date: Tue Sep 16 00:31:26 2008 -0700 - - Cache motion vectors in lowres lookahead - This vastly speeds up b-adapt 2, especially at large bframes values. - This changes output because now MV prediction in lookahead only uses L0/L1 MVs, not bidir. This isn't a problem, since the bidir prediction wasn't really correct to begin with, so the change in output is neither positive nor negative. - This also allowed the removal of some unnecessary memsets, which should also give a small speed boost. - Finally, this allows the use of the lowres motion vectors for predictors in some future patch. - -commit 44d3d5ba678ba24d90720d0883dddcda8832de03 -Author: Fiona Glaser -Date: Mon Sep 15 12:22:48 2008 -0700 - - Fix regression in b-adapt patch: encoder_open failed for multipass encodes without bframes. - -commit 58a770fe6a6f6777851a6fbb1c14043b9c0eff2a -Author: Fiona Glaser -Date: Mon Sep 15 10:53:29 2008 -0700 - - Stop SAR in y4m input from overriding --sar on commandline - -commit a8cb7662d9ffdab42c83074aee3835b3b0104c73 -Author: Loren Merritt -Date: Mon Sep 15 02:24:12 2008 -0600 - - hadamard_ac for psy-rd - c version is 1.7x faster than satd+sa8d+sad - ssse3 version is 2.3x faster than satd+sa8d+sad - -commit ecc9bfab548f464d4c2be899055f7ba567c1ed8e -Author: Fiona Glaser -Date: Sun Sep 14 21:36:45 2008 -0700 - - Psychovisually optimized rate-distortion optimization and trellis - The latter, psy-trellis, is disabled by default and is reserved as experimental; your mileage may vary. - Default subme is raised to 6 so that psy RD is on by default. - -commit 95ed2720b7772199f04cc9a657632107bb1c548c -Author: Fiona Glaser -Date: Sun Sep 14 18:18:15 2008 -0700 - - Add optional more optimal B-frame decision method - This method (--b-adapt 2) uses a Viterbi algorithm somewhat similar to that used in trellis quantization. - Note that it is not fully optimized and is very slow with large --bframes values. - It also takes into account weightb, which should improve fade detection. - Additionally, changes were made to cache lowres intra results for each frame to avoid recalculating them. This should improve performance in both B-frame decision methods. - This can also be done for motion vectors, which will dramatically improve b-adapt 2 performance when it is complete. - This patch also reads b_adapt and scenecut settings from the first pass so that the x264 header information in the output file will have correct information (since frametype decision is only done on the first pass). - -commit 80458ffcd62f0852e7092176b7b155bdfd3d5a82 -Author: Fiona Glaser -Date: Sat Sep 13 14:03:12 2008 -0700 - - Move adaptive quantization to before ratecontrol, eliminate qcomp bias - This change improves VBV accuracy and improves bit distribution in CRF and 2pass. - Instead of being applied after ratecontrol, AQ becomes part of the complexity measure that ratecontrol uses. - This allows for modularity for changes to AQ; a new AQ algorithm can be introduced simply by introducing a new aq_mode and a corresponding if in adaptive_quant_frame. - This also allows quantizer field smoothing, since quantizers are calculated beofrehand rather during encoding. - Since there is no more reason for it, aq_mode 1 is removed. The new mode 1 is in a sense a merger of the old modes 1 and 2. - WARNING: This change redefines CRF when using AQ, so output bitrate for a given CRF may be significantly different from before this change! - -commit f89e0d06700620d4e2f1467e80995f8192182496 -Author: Fiona Glaser -Date: Tue Sep 9 23:51:17 2008 -0700 - - Fix crash when using b-adapt at resolutions 32x32 or below. - Original patch by BugMaster, but was mostly rewritten in order to make b-adapt actually *work* at such resolutions, not merely stop crashing. - -commit d24f8a9153e67f2f9529283aeb523806fab17fe1 -Author: Fiona Glaser -Date: Tue Sep 9 23:12:20 2008 -0700 - - Add title-bar progress indicator under WIN32 - Also add bitrate-so-far output when piping data to x264 (total frames not known) - Patch mostly by recover from Doom9. - -commit 654e549862a0bf56671de13d38ad5c512d2a9efe -Author: Fiona Glaser -Date: Fri Sep 5 23:14:23 2008 -0700 - - Revert part of r963 - In some rare (but significant) cases, the optimized nal_encode algorithm gave incorrect results. - -commit cc0c3d4d1e639512e2b9003a68597fdb6ce00d4f -Author: Fiona Glaser -Date: Thu Sep 4 20:13:38 2008 -0700 - - Predict 4x4_DC asm - Also remove 5-year-old unnecessary #define that reduced speed unnecessarily under MSVC-compiled builds - -commit 5993fccac7646d84710b4ffc6feb3f3b4fd736d8 -Author: Fiona Glaser -Date: Thu Sep 4 00:43:54 2008 -0700 - - Faster NAL unit encoding and remove unused nal_decode - Small speedup at very high bitrates - -commit 5d0904bfda094b6243d9d8596c50edd4f0fe5528 -Author: Fiona Glaser -Date: Wed Sep 3 22:12:23 2008 -0700 - - CAVLC cleanup and optimizations - Also move some small functions in macroblock.c to a .h file so they can be inlined. - -commit 277d2da8958b5e08d119c0068a54842bc5c3af71 -Author: Fiona Glaser -Date: Wed Sep 3 21:43:06 2008 -0700 - - Faster avg_weight assembly - Unrolling the loop a bit improves performance - -commit 1af195341d0f210382827b43051c79e33d900989 -Author: Fiona Glaser -Date: Wed Sep 3 15:35:22 2008 -0700 - - Faster H asm intra prediction functions - Take advantage of the H prediction method invented for merged intra SAD and apply it to regular prediction, too. - -commit 4d84a45d7e505e4929a0110e047aa29a752e3253 -Author: Fiona Glaser -Date: Wed Sep 3 15:32:16 2008 -0700 - - Add merged SAD for i16x16 analysis - Roughly 30% faster i16x16 analysis under subme=1 - -commit 2bff50702978bf2af30ef2b58264bd71549bc702 -Author: Fiona Glaser -Date: Wed Sep 3 15:15:17 2008 -0700 - - Add sad_aligned for faster subme=1 mbcmp - Distinguish between unaligned and aligned uses of mbcmp - SAD_aligned, for MMX SADs, uses non-cacheline SADs. - -commit fc36067b632e611f7b0e056381dd641d469376e6 -Author: Fiona Glaser -Date: Tue Sep 2 11:49:55 2008 -0700 - - Improve progress indicator - Show average bitrate so far during encoding - Decrease update interval for longer encodes (max of 10 frames encoded between updates) - -commit ce21e79df8197abaa35d7a6838a1aedeb4411578 -Author: Fiona Glaser -Date: Mon Sep 1 10:35:41 2008 -0700 - - Fix speed regression in r951 - Row SATDs are only necessary in VBV mode, so don't need to be checked if VBV is off. - -commit 8957bad80bb17eb23a39b86e903f8058d79d7364 -Author: Holger Lubitz -Date: Sun Aug 31 20:55:50 2008 -0600 - - zigzag asm - -commit 44d9c160bf45b827be9f99a91d8f53062246873d -Author: Guillaume Poirier -Date: Sun Aug 31 21:46:31 2008 +0200 - - fix SOFLAGS used when building gtk frontend - patch by Markus Kanet %darkvision A gmx P eu% - -commit 1e393b8c8139a39e8ff9bfc782e866598f3e2615 -Author: Loren Merritt -Date: Wed Aug 20 20:56:56 2008 -0600 - - remove the distinction between itex and ptex - (changes 2pass statsfile format) - -commit 9ccd80faeec2b8baa565b5f2d577cf3f79efd2e7 -Author: Loren Merritt -Date: Wed Aug 20 20:51:39 2008 -0600 - - hardcode the ratecontrol equation, and remove the rceq option - -commit 79f4e3e270ebffc8663f35e9959abdd369b6f914 -Author: Fiona Glaser -Date: Wed Aug 27 13:14:36 2008 -0400 - - Fix some uses of uninitialized row_satd values in VBV - Resolves some issues with QP51 in I-frames with scenecut - -commit 8de7dbbec1bc754826227c67cba74ad8a225cfde -Author: Fiona Glaser -Date: Tue Aug 26 14:51:29 2008 -0400 - - Activate trellis in p8x8 qpel RD - Also clean up macroblock.c with some refactoring - Note that this change significantly reduces subme7+trellis2 performance, but improves quality. - Issue originally reported by Alex_W. - -commit 59de6938d16da6e79e572a41c2bbd9afc29e0a35 -Author: Gabriel Bouvigne -Date: Mon Aug 25 10:50:45 2008 -0400 - - Improve VBV accuracy - Don't use the previous frame's row SATD as a predictor if it is too different from this frame's row SATD. - -commit 7421c8cf8587c6fa0ac8cd61ba1ffb9c20099c2d -Author: Guillaume Poirier -Date: Fri Aug 22 21:05:37 2008 +0200 - - improve generation of Darwin libraries - Patch by vmrsss %vmrsss A gmail P com% - -commit 7086a2037ebb9bd45eec3cfa3372e0d04b7a2c31 -Author: Fiona Glaser -Date: Thu Aug 21 21:23:08 2008 -0400 - - Fix compilation in gcc 3.4.x (issue in r946) - Due to a bug in gcc 3.4.x, in certain cases of inlining, the array_non_zero_int_mmx inline asssembly is miscompiled and causes a crash with --subme 7 --8x8dct. - This minor hack fixes this issue. - -commit 20e8982e3196bf8d0820772571e75a50cd07aabe -Author: Loic Le Loarer -Date: Thu Aug 21 04:19:24 2008 -0600 - - shut up various gcc warnings - -commit 782740d5e4865f9bff83f8ac4f9b23fcf0f492f6 -Author: Loren Merritt -Date: Thu Aug 21 04:15:49 2008 -0600 - - fix a crash with invalid args and --thread-input (introduced in r921) - -commit 5a8727adddf4fc0f282c233c3a175f63ed41f211 -Author: Loren Merritt -Date: Wed Aug 20 05:36:32 2008 -0600 - - drop support for x86_32 PIC. - -commit 6dd4c075d8a1654ae928ea30ef7bdaf19a239cd9 -Author: Loren Merritt -Date: Tue Aug 19 01:55:57 2008 -0600 - - use permute macros in satd - move some more shared macros to x264util.asm - -commit 08d39756a08e00ae39196b125e2cecdb08136e17 -Author: Loren Merritt -Date: Wed Aug 20 20:32:13 2008 -0600 - - cosmetics - -commit c47120f04f7c805955556d3466418d9eb347af52 -Author: Loren Merritt -Date: Wed Aug 20 19:00:52 2008 -0600 - - r940 broke threads - -commit 968609dc2c8c09f6a11f1a47755667d34b3736b0 -Author: Fiona Glaser -Date: Wed Aug 20 13:28:15 2008 -0400 - - Cleanups in macroblock_cache_save/load - A bit more loop unrolling, and moving some constant code to the global init function - -commit 3b60ca85fb3bb8632a50378aac7fc21fce888cc5 -Author: Fiona Glaser -Date: Tue Aug 19 14:18:24 2008 -0600 - - Deblocking code cleanup and cosmetics - Convert the style of the deblocking code to the standard x264 style - Eliminate some trailing whitespace - -commit 8cbe60572ed19382f315af2c9f3ff267f91ccdd2 -Author: Fiona Glaser -Date: Mon Aug 18 23:03:37 2008 -0600 - - 4% faster deblock: special-case macroblock edges - Along with a bit of related code reorganization and macroification - -commit 45e367903b6af53319035d677f96d30514aa26ea -Author: David Pethes -Date: Sat Aug 16 09:43:26 2008 -0600 - - Add dedicated variance function instead of using SAD+SSD - Faster variance calculation - -commit 2597644146f2469380b4c7073831b0a09116d79f -Author: Loren Merritt -Date: Fri Aug 15 03:04:28 2008 -0600 - - 6% faster deblock: remove some clips, earlier termiantion on low qps. - -commit ddee314e91a679c9934d7482524a835b7c74fe1e -Author: Fiona Glaser -Date: Thu Aug 14 19:31:42 2008 -0600 - - Faster deblocking - Early termination for bS=0, alpha=0, beta=0 - Refactoring, various other optimizations - About 30% faster deblocking overall. - -commit 144001ed6bafc658ff6212981c01c5480286af8e -Author: Loren Merritt -Date: Sat Aug 2 08:19:50 2008 -0600 - - asm cosmetics - -commit 95b2dd9926f2ca4722610f1d46d907a170539d51 -Author: Daniel Vergien -Date: Wed Aug 6 08:10:53 2008 -0600 - - yet another posix-emulating define on solaris - -commit 79c9a1d230f15f5afd041a469bda33ace9a86d50 -Author: Gabriel Bouvigne -Date: Wed Aug 6 07:45:05 2008 -0600 - - update msvc projectfile - -commit 56b3baeccec26d2498d1dd86f0852848a820b7a0 -Author: Loren Merritt -Date: Wed Aug 6 07:34:42 2008 -0600 - - drop support for msvc6 - -commit d17f473df108af93c7696c6a144717db9cc8c71c -Author: Fiona Glaser -Date: Sat Aug 9 09:36:04 2008 -0600 - - Prevent VBV from lowering quantizer too much - This code seemed to act up unexpectedly sometimes, creating a situation where in 1-pass VBV mode, a frame's quantizer would drop all the way to qpmin and then shoot back upwards to qpmax, causing serious visual issues. - This change may decrease bitrate in VBV mode, but that is preferable to the artifacting produced by this code. - -commit 1eb8b071a232873e40e001ec7379a917265bf372 -Author: Fiona Glaser -Date: Sat Aug 9 09:34:37 2008 -0600 - - Improve subme7 at low QPs and add subme7 support in lossless mode - -commit 01d7deaf4e1129fc5037740c18c4c199ca2ad275 -Author: Loren Merritt -Date: Wed Jul 30 22:35:20 2008 -0600 - - cosmetics: merge x86inc*.asm - -commit 3b6d783faba57af702040b47eb2585cf5b216356 -Author: Fiona Glaser -Date: Wed Jul 30 15:29:46 2008 -0600 - - Add missing x264util.asm - -commit 5914efe709e8be23a05253d96bf1eb2cfaa0c83c -Author: Fiona Glaser -Date: Wed Jul 30 15:28:21 2008 -0600 - - Basic sanity checking of qpmax/qpmin options - -commit ff7639b042d066be7e6a26ba23bdb9804457d644 -Author: Fiona Glaser -Date: Wed Jul 30 14:42:29 2008 -0600 - - Fix regression in r922 - set the chroma DC coefficients to zero for residual coding in qpel-rd - fix C99ism - -commit 543601b8ae441c60b27001bc03db4e7ff8db4fef -Author: Holger Lubitz -Date: Tue Jul 29 21:36:01 2008 -0600 - - Refactor asm macros part 2: DCT - -commit 60f7c47de10a240cb50568996ff8232726c19881 -Author: Holger Lubitz -Date: Tue Jul 29 21:26:58 2008 -0600 - - Refactor asm macros part 1: DCT - -commit 63b84fa435de4355abc5e80fdc78a5d3081addc6 -Author: Fiona Glaser -Date: Tue Jul 29 17:08:38 2008 -0600 - - Improve intra RD refine, speed up residual_write_cabac - a do/while loop can be used for residual_write, but i8x8 had to be fixed so that it wouldn't call residual_write with zero coeffs - proper nnz handling added to cabac intra rd refine - chroma cbp added to 8x8 chroma rd - cbp was tested, but wasn't useful - -commit 9da8410dfd8877576438c909a3311688c19d6104 -Author: Fiona Glaser -Date: Tue Jul 29 13:42:41 2008 -0600 - - Fix a few more minor memleaks - -commit 1b078852bc942a773baf371f460aa6e471076d44 -Author: Loren Merritt -Date: Fri Jul 25 18:14:31 2008 -0600 - - stats summary: print distribution of numbers of consecutive B-frames - -commit 6a85cf3434816ac7e7f8772f78f07f9b3934e2ee -Author: Loic Le Loarer -Date: Fri Jul 25 16:08:32 2008 -0600 - - add interlacing to the list of stuff checked by x264_validate_levels - -commit 5a9231a860ea07aab3c5405fda2371903a4b2b93 -Author: Fiona Glaser -Date: Thu Jul 24 07:58:50 2008 -0600 - - Fix C99-ism in r907 - -commit 502baa8a5f4271b99a35e79f0604f4bf6f541d22 -Author: Fiona Glaser -Date: Thu Jul 17 18:17:22 2008 -0600 - - Faster temporal predictor calculation - Split into a separate commit because this changes rounding, and thus changes output slightly. - -commit a6cee0ab6d2e6a9fb6580827dc854c09567c74f0 -Author: Fiona Glaser -Date: Thu Jul 17 07:55:24 2008 -0600 - - Align lowres planes for improved cacheline split performance - -commit 579e930f34e88196fa96ca576b78891f3df69c87 -Author: Loren Merritt -Date: Tue Jul 15 20:16:16 2008 -0600 - - autodetect level based on resolution/bitrate/refs/etc, rather than defaulting to L5.1 - if vbv is not enabled (and especially in crf/cqp), we have to guess max bitrate, so we might underestimate the required level. - -commit 95e859854267062a9f48143faf334cdff7f564e4 -Author: Loren Merritt -Date: Thu Jul 17 20:25:03 2008 -0600 - - fix bs_write_ue_big for values >= 0x10000. - (no immediate effect, since nothing writes such values yet) - -commit 7070f098260c188b2f138b44def409108e8f2449 -Author: BugMaster -Date: Wed Jul 16 11:54:51 2008 -0600 - - Fix lossless mode borked in r901 - -commit 6916c39a51544070dec5b59fd03e571f74af06a1 -Author: Fiona Glaser -Date: Sat Jul 12 14:37:58 2008 -0600 - - Relax QPfile restrictions - Allow a QPfile to contain fewer frames than the total number of frames in the video and have ratecontrol fill in the rest. - Patch by kemuri9. - -commit 299820827918a7586e40dac9f9dd62221350506d -Author: Fiona Glaser -Date: Sat Jul 12 14:10:38 2008 -0600 - - Limit MVrange correctly in interlaced mode - Bug report by Sigma Designs, Inc. - -commit 0e0904d2841960cd2f57d8ec1e873545dccf3522 -Author: Fiona Glaser -Date: Fri Jul 11 22:53:27 2008 -0600 - - Fix bug with PCM and adaptive quantization - In rare cases CABAC desync could occur, causing bitstream corruption - -commit 1b7446bf3f35e3f680824371c00cd1a1d98eaf76 -Author: BugMaster -Date: Fri Jul 11 16:00:02 2008 -0600 - - Fix memory leak upon x264 closing - Doesn't affect the CLI, but potentially important for programs which call x264 as a shared library. - -commit 59f016f9590de2cea68b82661599061a044840af -Author: Fiona Glaser -Date: Fri Jul 11 15:45:54 2008 -0600 - - Fix compilation on PPC systems (borked in r903) - Bigendian systems didn't have endian_fix32 defined - -commit 13575fcdc791b05c26d48c04050f956620cc41e5 -Author: Fiona Glaser -Date: Fri Jul 11 14:16:18 2008 -0600 - - Add L1 reflist and B macroblock types to x264 info - Also remove display of "PCM" if PCM mode is never used in the encode. - L1 reflist information will only show if pyramid coding is used. - -commit 6b4ad5f53899a3eafff4307e98fae18998677568 -Author: Fiona Glaser -Date: Thu Jul 10 08:36:45 2008 -0600 - - Fix and enable I_PCM macroblock support - In RD mode, always consider PCM as a macroblock mode possibility - Fix bitstream writing for PCM blocks in CAVLC and CABAC, and a few other minor changes to make PCM work. - PCM macroblocks improve compression at very low QPs (1-5) and in lossless mode. - -commit 05d7fb66d6aaaad00d833c810aee05b0c89948f9 -Author: Loren Merritt -Date: Fri Jul 4 21:03:26 2008 -0600 - - de-duplicate vlc tables - -commit 91e0ff6b490a01f5f5438639ed517cb4b09802f0 -Author: Loren Merritt -Date: Fri Jul 4 18:56:30 2008 -0600 - - faster ue/se/te write - -commit ab90da748df305101b720f932736dd6d7f990214 -Author: Fiona Glaser -Date: Fri Jul 4 18:32:32 2008 -0600 - - faster bs_write - -commit c61a1df1db0226cae8bd0b1b5be7e0856e0cb26c -Author: Loren Merritt -Date: Thu Jul 3 00:37:16 2008 -0600 - - cosmetics in ssd asm - -commit c9c7edf3e6fa8fbdd4d7bf2beccb448bdcac9aa4 -Author: Fiona Glaser -Date: Sun Jul 6 12:59:15 2008 -0600 - - Various optimizations and cosmetics - Update AUTHORS file with Gabriel and me - update XCHG macro to work correctly in if statements - Add new lookup tables for block_idx and fdec/fenc addresses - Slightly faster array_non_zero_count_mmx (patch by holger) - Eliminate branch in analyse_intra - Unroll loops in and clean up chroma encode - Convert some for loops to do/while loops for speed improvement - Do explicit write-combining on --me tesa mvsad_t struct - Shrink --me esa zero[] array - Speed up bime by reducing size of visited[][][] array - -commit 653249521805b21564c00148f7db1e4b28e6e15c -Author: Fiona Glaser -Date: Sun Jul 6 11:15:19 2008 -0600 - - Resolve floating point exception with frame_init_lowres mmx - In some cases, the mmx version of frame_init_lowres could leave the FPU uninitialized for use in ratecontrol, resulting in floating point exceptions. - Since frame_init_lowres is such a time-consuming function, an emms was just put at the end, since it costs almost nothing compared to the total time of frame_init_lowres. - -commit 552a04ea3c56317046686bdc41d31e15490f6b85 -Author: Eric Petit -Date: Fri Jul 4 11:31:32 2008 +0200 - - Update my email address - -commit bdbd4fe7709e129f90cf3d7d59b500e915c6b187 -Author: Fiona Glaser -Date: Thu Jul 3 20:05:00 2008 -0600 - - Update file headers throughout x264 - Update "Authors" lists based on actual authorship; highest is most important - Update copyright notices and remove old CVS tags from file headers - Add file headers to GTK and other sections missing them - Update FSF address - Other header-related cosmetics - -commit fb660325d99298ab6cd2285d76f2fddf83fe34cb -Author: Fiona Glaser -Date: Wed Jul 2 20:59:24 2008 -0600 - - denoise_dct asm - -commit 223eedb04b9d97f2b20bde8136959e101bb3e0c9 -Author: Loren Merritt -Date: Wed Jul 2 20:55:10 2008 -0600 - - cosmetics in permutation macros - SWAP can now take mmregs directly, rather than just their numbers - -commit 5b92682dcee03054ef6f033c9dde6ecd251674fa -Author: Fiona Glaser -Date: Wed Jul 2 10:43:57 2008 -0600 - - Fix bug in adaptive quantization - In some cases adaptive quantization did not correctly calculate the variance. - Bug reported by MasterNobody - -commit 04dc25367d218c92ba85c4cae34cc8b36bab05a3 -Author: Loren Merritt -Date: Sun Jun 29 00:00:03 2008 -0600 - - lowres_init asm - rounding is changed for asm convenience. this makes the c version slower, but there's no way around that if all the implementations are to have the same results. - -commit a59f4a7b6bfc12bcd8763de6b008f1bb753b2dae -Author: Fiona Glaser -Date: Tue Jul 1 23:42:39 2008 -0600 - - Optimizations and cosmetics in macroblock.c - If an i4x4 dct block has no coefficients, don't bother with dequant/zigzag/idct. Not useful for larger sizes because the odds of an empty block are much lower. - Cosmetics in i16x16 to be more consistent with other similar functions. - Add an SSD threshold for chroma in probe_skip to improve speed and minimize time spent on chroma skip analysis. - Rename lambda arrays to lambda_tab for consistency. - -commit ed9a9f313240c887a7a3b330ceabe25fccbf47db -Author: Gabriel Bouvigne -Date: Thu Jun 26 21:09:55 2008 -0600 - - some asm functions require aligned stack. disable these when compiling with msvc/icc. - -commit e9369576747d339078b57fc227302f8c6e79011a -Author: Fiona Glaser -Date: Tue Jun 24 15:27:41 2008 -0600 - - Move bitstream end check to macroblock level - Additionally, instead of silently truncating the frame upon reaching the end of the buffer, reallocate a larger buffer instead. - -commit ec3d09554addbcecb8cf82f3ff33ac737a6f996b -Author: Fiona Glaser -Date: Tue Jun 24 12:23:50 2008 -0600 - - Convert NNZ to raster order and other optimizations - Converting NNZ to raster order simplifies a lot of the load/store code and allows more use of write-combining. - More use of write-combining throughout load/save code in common/macroblock.c - GCC has aliasing issues in the case of stores to 8-bit heap-allocated arrays; dereferencing the pointer once avoids this problem and significantly increases performance. - More manual loop unrolling and such. - Move all packXtoY functions to macroblock.h so any function can use them. - Add pack8to32. - Minor optimizations to encoder/macroblock.c - -commit d97bcbcbebcbe37d9e36b414a3eb371fdc0f4450 -Author: Loren Merritt -Date: Thu Jun 12 03:00:23 2008 -0600 - - mc_chroma_sse2/ssse3 - -commit 6ec1bd732c5eb73c9e303b8e7e3963c80044aa94 -Author: Loren Merritt -Date: Thu Jun 12 08:43:41 2008 -0600 - - checkasm --bench=function_name - -commit 473140b265c8865c4089cc8a78352dff4b4bc1f6 -Author: Loren Merritt -Date: Thu Jun 12 01:39:22 2008 -0600 - - interleave psnr/ssim computation with reference frame filtering, to improve cache coherency - -commit 2a7dd58c68fda378a5e8b68184ff56daee9f9019 -Author: Fiona Glaser -Date: Sun Jun 15 11:59:25 2008 -0600 - - Add more inline asm and a runtime check for MMXEXT support - x264 will now terminate gracefully rather than SIGILL when run on a machine with no MMXEXT support. - A configure option is now available to build x264 without assembly support for support on such old CPUs as the Pentium 2, K6, etc. - -commit 56108cb63848d4a553bccb7389226910f3f25e2e -Author: Fiona Glaser -Date: Sun Jun 15 11:51:36 2008 -0600 - - Use aligned memcpy for x264_me_t struct and cosmetics - -commit dba0e5a2e089cd675e201cdf4e3358eb7a0e22cc -Author: Fiona Glaser -Date: Sun Jun 15 11:50:17 2008 -0600 - - Cosmetics and loop unrolling - GCC is not very good at loop unrolling in cases where it can perform constant propagation, so the unrolling unfortunately has to be done manually. - -commit d108f91937cdb67b2bfa4f6e7fc1cf6b776febbf -Author: Fiona Glaser -Date: Thu Jun 12 09:17:49 2008 -0600 - - Fix regression in 64-bit in r882 - i_mvc needs to be 64-bit when used with a 64-bit memory pointer - -commit 5204112861581df847a4a892ea63b8a0d72f2e6c -Author: Fiona Glaser -Date: Thu Jun 12 08:09:22 2008 -0600 - - More tweaks to me.c - Added inline MMX version of UMH's predictor difference test - Various cosmetics throughout me.c - Removed a C99-ism introduced in r878. - -commit d4e077867f79a555efb83e45d93dc6f170b1fb3e -Author: Fiona Glaser -Date: Wed Jun 11 18:23:00 2008 -0600 - - Fix regression in r736 - r736 added intra RD refinement to B-frames; however, it is possible for subme=7 to be used without b-rdo. - This means intra RD isn't run, and therefore it is possible for intra chroma analysis to not have been run, since update_cache was never called for an intra block, and chroma ME is not required even at subme=7. - r801, which removed a memset, made this worse because previously the chroma prediction mode was at least initialized to zero; now it was not initialized at all. - Therefore, --no-chroma-me, --subme 7, and no --b-rdo had the potential to crash. - This change restricts intra RD refinement to only be run when --b-rdo is enabled (sensible to begin with), thus preventing a crash in this case. - -commit 3a095b2ce5c30eea665f0e6fb44ba2b3510adf65 -Author: Fiona Glaser -Date: Tue Jun 10 21:37:57 2008 -0600 - - Fix regression in r850 - Bug resulted in rare incorrect chroma encoding - -commit 5e59162c0ab7cafec85ba7c8cf648d7300cdc860 -Author: Gabriel Bouvigne -Date: Tue Jun 10 18:40:52 2008 -0600 - - Cosmetics in VBV handling - -commit d4a4b3f168251a9474f2a945e859e7813a7a3120 -Author: Fiona Glaser -Date: Tue Jun 10 18:34:46 2008 -0600 - - Tweaks and cosmetics in me.c - Use write-combining for predictor checking and other tweaks. - -commit 9cc180ac4a79cae85790c1eeefa692d4f12b5232 -Author: Fiona Glaser -Date: Fri Jun 6 14:59:10 2008 -0600 - - Partially inline trellis quantization - Inlining trellis into the 4x4/8x8 trellis wrappers increases trellis speed by about 5-10% through constant propagation. - -commit 3d9b6b3ce55dce861d8b64f832fa40dfe67d6bca -Author: Fiona Glaser -Date: Fri Jun 6 12:32:57 2008 -0600 - - Various cosmetic changes. - -commit 49ce3ac63b5305ca28f65bd75e6a4e6540d5954a -Author: Fiona Glaser -Date: Fri Jun 6 22:57:33 2008 -0600 - - avg_weight_sse2 - -commit c0c0e1f48de74acec0b681bfa842d3c8cddb4a32 -Author: Loren Merritt -Date: Fri Jun 6 23:31:22 2008 -0600 - - many changes to which asm functions are enabled on which cpus. - with Phenom, 3dnow is no longer equivalent to "sse2 is slow", so make a new flag for that. - some sse2 functions are useful only on Core2 and Phenom, so make a "sse2 is fast" flag for that. - some ssse3 instructions didn't become useful until Penryn, so yet another flag. - disable sse2 completely on Pentium M and Core1, because it's uniformly slower than mmx. - enable some sse2 functions on Athlon64 that always were faster and we just didn't notice. - remove mc_luma_sse3, because the only cpu that has lddqu (namely Pentium 4D) doesn't have "sse2 is fast". - don't print mmx1, sse1, nor 3dnow in the detected cpuflags, since we don't really have any such functions. likewise don't print sse3 unless it's used (Pentium 4D). - -commit f9ad5ee2564bb272635f0c69fefa28e0b1b47f37 -Author: Loren Merritt -Date: Fri Jun 6 23:30:37 2008 -0600 - - enable ssse3 phadd satd on Penryn. - -commit b8670681bbe2312f1b2d1842bbd473223f005c69 -Author: Loren Merritt -Date: Fri Jun 6 22:59:37 2008 -0600 - - benchmark most of the asm functions (checkasm --bench). - -commit c24df7dae689d86e1d55137d343fc3589a75887d -Author: Fiona Glaser -Date: Thu Jun 5 11:32:05 2008 -0600 - - Cosmetic: fix C99-ism - -commit a6c98f6f5798e31634b47aee0a18d7ecd5eff3e1 -Author: Fiona Glaser -Date: Wed Jun 4 21:28:48 2008 -0600 - - Use a gaussian window for cplxblur - Cplxblur was originally intended to use a gaussian window, but in its current form did not. This change provides a tiny improvement to 2pass ratecontrol. - -commit 970d61004fe559aa3e89e64185ee3b9efea53954 -Author: Loren Merritt -Date: Mon Jun 2 09:47:50 2008 -0600 - - cosmetics - -commit b5053542a8e89beff4fb4ac13c8030f35c2fd79d -Author: Loren Merritt -Date: Mon Jun 2 09:40:49 2008 -0600 - - nasm compatible NX stack - -commit 8c1ec12a747e8b9e5c4eebf786b8262286b9c965 -Author: Loren Merritt -Date: Mon Jun 2 08:57:59 2008 -0600 - - CQP is incompatible with AQ - -commit 9bdf19c2f114a439cc0f4d27ab8493912918584d -Author: Fiona Glaser -Date: Sat May 24 13:10:21 2008 -0600 - - memzero_aligned_mmx - -commit 579857968ab579b378f96d96c02aba68a2450367 -Author: BugMaster -Date: Sat May 24 01:09:07 2008 -0600 - - binmode stdin on mingw, not just msvc - -commit 71a919d44670fb4e1c4777d770c112a2f23f9b23 -Author: Fiona Glaser -Date: Fri May 23 21:22:29 2008 -0600 - - omit redundant mc after non-rdo dct size decision, and in b-direct rdo - -commit 53712c4bb01f9cc5c22d7eeb408f20d4f4d9520e -Author: Fiona Glaser -Date: Wed Apr 9 16:46:51 2008 -0600 - - allow fractional CRF values with AQ. - -commit 8a1d6cb266b8fa4f29725bca31c265253134fcc9 -Author: Noboru Asai -Date: Mon Jun 2 09:12:29 2008 -0600 - - fix some uninitialized partitions in rdo - -commit 56f2bc8950f5abaf20b1241511d1b02db3945f3d -Author: Gabriel Bouvigne -Date: Mon Jun 2 12:53:01 2008 -0600 - - 2-pass VBV support and improved VBV handling - Dramatically improves 1-pass VBV ratecontrol (especially CBR) and provides support for VBV in 2-pass mode. This consists of a series of functions that attempts to find overflows and underflows in the VBV from the first-pass statsfile and fix them before encoding. - 1-pass VBV code partially by Fiona Glaser. - -commit 344cb1693dbe1471a3a94fef3156e94d684350de -Author: Alexander Strange -Date: Mon Jun 2 12:16:51 2008 -0600 - - Fix noise reduction in threaded mode. - Previously enabling noise reduction with threads had no effect. - Note that this is not an optimal solution; each thread still tracks noise reducation separately (unlike in single-threaded mode). - -commit 708b9862103947e687424dce8cbd9fade3e094b6 -Author: Loren Merritt -Date: Tue May 20 20:15:41 2008 -0600 - - fix a crash on win32 with threads. - r852 introduced an assumption in deblock that the stack is aligned. - -commit 1851df553d6a5983d9db7a83e6cf922e7be0b5bb -Author: Loren Merritt -Date: Tue May 20 03:58:08 2008 -0600 - - remove nasm version check. a feature check is all that's needed. - silence stderr in yasm version check. - -commit d4e6d802a4fd36dd1b4c0d15907660a20233ff47 -Author: Loren Merritt -Date: Sun May 18 08:33:34 2008 -0600 - - cosmetics in cabac - -commit 764a012365e23d35f54f103fb174a5b4319d5fed -Author: Fiona Glaser -Date: Sun May 18 07:14:28 2008 -0600 - - faster residual_write_cabac - -commit 92b3ea8c24f5932a3535cac71e8d9260e5a8e198 -Author: Loren Merritt -Date: Sun May 18 06:23:57 2008 -0600 - - change DEBUG_DUMP_FRAME to run-time --dump-yuv - -commit cb4dc4aee33f679ba4f73010c9b88f5d54799740 -Author: Loren Merritt -Date: Sat May 17 03:39:59 2008 -0600 - - x264_median_mv_mmxext - this is the first non-runtime-detected use of mmxext, but it has to be inlined - -commit b594e8f9e4ff44d797820b3020e9d3a179843e50 -Author: Loren Merritt -Date: Sun Apr 20 03:18:19 2008 -0600 - - factor duplicated code out of deblock chroma mmx - -commit ffd9196b0f62edd09b3a581c8acc1072c1ddfaf0 -Author: Loren Merritt -Date: Tue Apr 15 17:52:32 2008 -0600 - - deblock_luma_intra_mmx - -commit 20f7ae51a4ac13b6e38a0edb1717b56733ae68c7 -Author: vmrsss -Date: Sat May 17 00:50:22 2008 -0600 - - write aspect ratio in mp4 - -commit d5d07b1823292e65572466577689819cf3bb98ec -Author: Fiona Glaser -Date: Thu May 15 22:44:12 2008 -0600 - - omit delta_quant in i16x16 blocks with no residual - (all other block types were already covered, but i16x16 cbp is special) - -commit bfa2eac7fdc92eaf27004ef66e93898ec27f61f1 -Author: Fiona Glaser -Date: Thu May 15 06:01:01 2008 -0600 - - explicit write combining, because gcc fails at optimizing consecutive memory accesses - -commit 32bd2d645c63c7cf55a2f9b33e39e63144c3e835 -Author: Fiona Glaser -Date: Thu May 15 05:41:43 2008 -0600 - - force unroll macroblock_load_pic_pointers - and a few other minor optimizations - -commit 2d816a51e2ed594f3e98515ea5f427d08a4df638 -Author: Fiona Glaser -Date: Thu May 15 05:14:53 2008 -0600 - - quant_2x2_dc_ssse3 - -commit 0bb9b6b8bb2853ccf18553f29e220e583f712f42 -Author: Fiona Glaser -Date: Sat May 17 00:47:31 2008 -0600 - - r836 borked lossless cabac nnz - -commit 08ad421f4b79eafafa100e95a28d861f07dfaed4 -Author: Henry Bent -Date: Wed May 7 19:49:14 2008 -0600 - - use elf instead of a.out on netbsd - -commit a0194ef6806ac3249c9add16f82dc4a38cf2680e -Author: Ning Xin -Date: Wed May 7 17:18:44 2008 -0600 - - fix x264_realloc when not using libc realloc. - -commit 1baca94c8da547ebc3363678da6fe09556b97658 -Author: Loren Merritt -Date: Mon May 5 16:28:24 2008 -0600 - - don't pretend to support win64. remove all related code. - it hasn't worked since probably some time in 2005, and won't ever be fixed unless someone steps up to maintain it. - -commit d09b8e9155b67dd5554b211035e34ac54f8b24c1 -Author: Loren Merritt -Date: Mon May 5 16:25:19 2008 -0600 - - cosmetics: replace last instances of parm# asm macros with r# - -commit 709093dfde92120131f69dd3780a66e01e5d3d67 -Author: Loren Merritt -Date: Mon Apr 28 03:12:29 2008 -0600 - - remove DEBUG_BENCHMARK - -commit 108897fde1a1713ff2546a1dc4e998e1b8b95f44 -Author: Fiona Glaser -Date: Sun Apr 27 03:10:28 2008 -0600 - - faster probe_skip - -commit ad6c91f064e6e6ceab3b876713006e5e1fb3f574 -Author: Loren Merritt -Date: Tue Apr 22 17:16:25 2008 -0600 - - drop support for pre-SSE3 assemblers - -commit 27ae7576cf0a978317fc9c1be3fc3b562338a7c4 -Author: Loren Merritt -Date: Fri Apr 25 00:33:12 2008 -0600 - - s/x264_cpu_restore/x264_emms/ - no point in giving it a generic name when it's not generic - -commit 495463e3f7ddbd643a4bfb8475bf3dfbe4fb4bf9 -Author: Fiona Glaser -Date: Sun Apr 27 02:37:37 2008 -0600 - - faster cabac_mb_cbp_luma - ported from ffmpeg - -commit 36f80085d73652cbddfeb9de92ec6e41e6b6d34f -Author: Fiona Glaser -Date: Fri Apr 25 21:41:40 2008 -0600 - - remove some redundant nnz counts - move some nnz counts from macroblock_encode to cavlc if cabac doesn't need them - -commit 79f03a3ba1fa908b9044845d8b52b376997c74e9 -Author: Fiona Glaser -Date: Fri Apr 25 20:43:57 2008 -0600 - - compute missing nnz count in subme7 cavlc - -commit 20720d6b7e2e0406e29a121d5340cd5199083f44 -Author: Fiona Glaser -Date: Fri Apr 25 01:47:47 2008 -0600 - - remove a division in macroblock-level bookkeeping - -commit 03da01e43fbccb14e054bab2464e594991e5108f -Author: Fiona Glaser -Date: Thu Apr 24 18:55:30 2008 -0600 - - omit P/B-skip mc from macroblock_encode if the pixels haven't been overwritten since probe_skip - -commit e0f13712fd496702f3f7c0cecfb043f0a6af9b3e -Author: Loren Merritt -Date: Thu Apr 24 05:17:04 2008 -0600 - - earlier termination in SEA if mvcost exceeds residual - -commit 2fe89852d1789865e8bea8fee18438d14ddeaf4e -Author: Loren Merritt -Date: Tue Apr 22 04:00:24 2008 -0600 - - remove void* arithmetic from r821 - -commit 8b6df37d8f1882bba3702aa3dafe7ae38bbd6b23 -Author: Guillaume Poirier -Date: Fri Apr 25 11:29:09 2008 +0200 - - Fix define of illegal function identifiers (as defined in section "7.1.3 Reserved identiers" of C99 spec) - -commit e0d72e3d4963e671626ed31e57221ebe45283d75 -Author: Guillaume Poirier -Date: Fri Apr 25 10:50:48 2008 +0200 - - Fix define of illegal identifier (as defined in section "7.1.3 Reserved identiers" of C99 spec) "__UNUSED__", and use the one defined in common/osdep.h, i.e. "UNUSED" - based on a patch by Diego Biurrun - -commit a146ee3d8bb003e66ccf908ca83239c693068f48 -Author: Guillaume Poirier -Date: Thu Apr 24 14:46:11 2008 +0200 - - more consistent include name (in line with other PPC includes) - -commit 4ee9642aef80b64f7b726ad245e6ab2ea631e896 -Author: Guillaume Poirier -Date: Thu Apr 24 14:44:24 2008 +0200 - - fix illegal identifiers in multiple inclusion guards - patch by Diego Biurrun % diego A biurrun P de % - -commit 34ed67475e0f08fc502ed287f918cf7d676f20bf -Author: Fiona Glaser -Date: Tue Apr 22 00:38:37 2008 -0600 - - AQ now treats perfectly flat blocks as low energy, rather than retaining previous block's QP. - fixes occasional blocking in fades. - -commit 28a2d7af87ceb29b93e73c99406316c04b2c9f23 -Author: Loren Merritt -Date: Sun Apr 20 12:19:46 2008 -0600 - - checkasm cabac - -commit 1877d1c430ff6a167b76736cc3527efe19dc330c -Author: Loren Merritt -Date: Sun Apr 20 02:39:31 2008 -0600 - - s/movdqa/movaps/g - -commit 6df41d50d936c428e1e5239a2eded54ee13e9156 -Author: Loren Merritt -Date: Sun Apr 20 18:25:53 2008 -0600 - - --asm to allow testing of different versions of asm without recompile - -commit 87132ed66fd3887a0a16618272fcf97ed244f6af -Author: Loren Merritt -Date: Sat Apr 12 01:40:28 2008 -0600 - - copy left neighbor pixels directly from previous mb instead of main plane - -commit 6eb5483505f40bb319ce0afa052ee41543993fc1 -Author: Fiona Glaser -Date: Wed Apr 9 16:30:34 2008 -0600 - - cacheline split workaround for mc_luma - -commit c1e43f094095265a77c9584fbfc25209b62efc78 -Author: Guillaume Poirier -Date: Wed Apr 16 10:46:15 2008 +0200 - - add "SECTION_RODATA" before "SECTION .text" to setup the fakegot label used in macho binaries. - This fixes compilation with --enable-pic - Requires Yasm 0.7.0 or newer - Patch by Dave Lee % davelee P com A gmail P com % - -commit 67813bbfbbb43aa65a15e659a5ea668c8d8cb26c -Author: Loren Merritt -Date: Sun Apr 13 10:29:15 2008 -0600 - - more hpel fixes - -commit 0acaad1b446fbe76e7dc6924f3005d3bf88f73ce -Author: Gabriel Bouvigne -Date: Thu Apr 10 08:59:19 2008 -0600 - - update msvc projectfile - -commit 25558461eae393552684aab8d847a065c73667e5 -Author: Loren Merritt -Date: Fri Apr 11 18:48:30 2008 -0600 - - r810 borked hpel_filter_sse2 on unaligned buffers - -commit 0f453853989c33919769b1a8376d7daf6acfccd2 -Author: Loren Merritt -Date: Thu Apr 10 03:17:53 2008 -0600 - - threads=auto on multicore now implies thread input, just like explicit thread numbers already did - -commit 539103a59525903e82a013d7a300d2a786d35d22 -Author: Loren Merritt -Date: Tue Apr 8 20:16:50 2008 -0600 - - dct4 sse2 - -commit 9168abfaf453622d8297ee049dc5951f93ad196c -Author: Loren Merritt -Date: Tue Apr 8 12:19:23 2008 -0600 - - faster x86_32 dct8 - -commit 56bf7565f2743e4fe85763388cb74b75b9bf41c5 -Author: Loren Merritt -Date: Mon Apr 7 10:22:03 2008 -0600 - - macros to deal with macros that permute their arguments - -commit 32ef8652729945bc7bcaf2b1d3e112f5a7530bc6 -Author: Loren Merritt -Date: Mon Apr 7 08:24:40 2008 -0600 - - mmx cachesplit sad of non-square sizes checked height instead of width - -commit afbcfdc2d2b74f318eb1cb1db7a6a711d4d115e5 -Author: Loren Merritt -Date: Fri Apr 4 01:07:40 2008 -0600 - - sfence after nontemporal stores - -commit 7bdaab607070f6c30eb919d9e22c073650ee2f70 -Author: Loren Merritt -Date: Wed Apr 2 11:22:43 2008 -0600 - - simplify hpel filter asm (move control flow to C) and add sse2, ssse3 versions - -commit 29899d84c3ca0e11f70a0aea8e6adf721e6bbfb2 -Author: Loren Merritt -Date: Thu Apr 3 20:46:36 2008 -0600 - - more mmx/xmm macros (mova, movu, movh) - -commit 937b792529e12884ec9a6e094f93628640f60ad2 -Author: Fiona Glaser -Date: Wed Apr 2 05:06:02 2008 -0600 - - improve handling of cavlc dct coef overflows - support large coefs in high profile, and clip to allowed range in baseline/main - -commit bdfec13d6ac6e70f9a01b1a36b1398b800c58e0f -Author: Guillaume Poirier -Date: Mon Mar 31 10:50:45 2008 +0200 - - fix shared libs on MacOSX - based on a patch by İsmail Dönmez - -commit 658b058609f6ae51b18d4b022d0398d4d11ff134 -Author: Loren Merritt -Date: Mon Mar 31 02:27:53 2008 -0600 - - typo in r803 - -commit def7e3aaf85502eba82efa9a13c176e87cff8268 -Author: Loren Merritt -Date: Sun Mar 30 18:08:28 2008 -0600 - - fix a crash on mp4 muxing with invalid params - -commit b59440f09b7eb7e6f30c1131d56843ee92e3751d -Author: Fiona Glaser -Date: Sun Mar 30 17:58:41 2008 -0600 - - variance-based psy adaptive quantization - new options: --aq-mode --aq-strength - AQ is enabled by default - -commit 8d8f3ea41ddb1c11baf018c9db58df3747f8697f -Author: Zuxy Meng -Date: Sat Mar 29 18:04:23 2008 -0600 - - fix naming of .dll on mingw - -commit 7e3ef7ce24f223ea1734a64585f747131d1d7b6b -Author: Loren Merritt -Date: Sat Mar 29 17:53:36 2008 -0600 - - don't distinguish between mingw and cygwin - -commit 05e91fb1c26bb42d1c124e7812fc2be0533eae6e -Author: Fiona Glaser -Date: Sat Mar 29 16:27:54 2008 -0600 - - remove a memset - -commit 6f44349a9a703cd673283810fe56febec9f76783 -Author: Loren Merritt -Date: Sat Mar 29 16:27:08 2008 -0600 - - typo. don't evaluate rd pskip when p16x16 found ref>0. - -commit 27b73b3b86524ec9b0bdf8310a55081898b408c0 -Author: Loren Merritt -Date: Sat Mar 29 20:42:51 2008 -0600 - - r784 borked lossless dc zigzag - -commit c1c00e6cc02f500de1b955897e60b0f16ebb0ddf -Author: Loren Merritt -Date: Tue Mar 25 07:31:51 2008 -0600 - - fix an arithmetic overflow that disabled SEA threshold after finding a mv with SAD < mvcost. - -commit 1c72e71929a6eabded391ec6f16ba66d3cac75f3 -Author: Guillaume Poirier -Date: Tue Mar 25 16:30:04 2008 +0000 - - fix hpel_filter_altivec picked up by checkasm - Patch by Manuel %maaanuuu A gmx.net % and Noboru Asai % noboru P asai A gmail P com % - -commit 66a0c19d3659bbbc69decb88465f5957cf3611ef -Author: Loren Merritt -Date: Tue Mar 25 00:59:50 2008 -0600 - - faster residual - -commit 41cd480c2b0b83a939effe01e855b9099d1124eb -Author: Loren Merritt -Date: Mon Mar 24 21:31:46 2008 -0600 - - nasm doesn't like align(nop) in structs - -commit 727377bf7582bf4c69083f2bf94b4e6c5965032c -Author: Loren Merritt -Date: Mon Mar 24 19:25:19 2008 -0600 - - reduce the size of some cabac arrays - -commit c9e8cfed1fec1a3e69db676725178c27b80c9c90 -Author: Fiona Glaser -Date: Mon Mar 24 19:21:24 2008 -0600 - - use cabac context transition table from trellis in normal residual coding too - -commit a3e11cbf36d78cb6b4147e8f1a73ee7c9387397e -Author: Fiona Glaser -Date: Mon Mar 24 19:12:07 2008 -0600 - - rearrange cabac struct to reduce code size - -commit 9289e80611d89ad4050fa738dec9a530c8f4e3d4 -Author: Fiona Glaser -Date: Mon Mar 24 03:25:25 2008 -0600 - - higher precision RD lambda - improves quality at QP<=12. - -commit aaced0861e76767a5c0ce24a94214a261d9eb459 -Author: Loren Merritt -Date: Mon Mar 24 01:56:31 2008 -0600 - - faster cabac_encode_ue_bypass - -commit 23e52ef3bbc0690fe55e49bf32c595adc0404878 -Author: Loren Merritt -Date: Sun Mar 23 22:14:18 2008 -0600 - - cabac asm. - mostly because gcc refuses to use cmov. - 28% faster than c on core2, 11% on k8, 6% on p4. - -commit ecc95abde1efbd7d3ea9a42475a628f31cb572ea -Author: Loren Merritt -Date: Sun Mar 23 22:08:07 2008 -0600 - - cosmetics in cabac - -commit 9e7cfc35e955c3693b5690c233cc0049be222bce -Author: Loren Merritt -Date: Sat Mar 22 20:25:06 2008 -0600 - - inline cabac_size_decision - -commit 542027fac9212ca1f6d24a39ebea779bfec91123 -Author: Loren Merritt -Date: Sat Mar 22 03:25:03 2008 -0600 - - cosmetics in DECLARE_ALIGNED - -commit 52fb83347c17f88ea523763223b555ff5f475698 -Author: Loren Merritt -Date: Sat Mar 22 03:06:18 2008 -0600 - - don't distinguish between luma4x4 and luma4x4ac - -commit b437d2d4c90056b1dcb4f3220234d06d03f3e9b4 -Author: Loren Merritt -Date: Sat Mar 22 02:46:31 2008 -0600 - - faster lossless zigzag - -commit 489555ed890812e16b0f6a14b86abe0e819ab513 -Author: Loren Merritt -Date: Sat Mar 22 03:14:33 2008 -0600 - - more alignment - -commit 7b0e2bde0aedbd174942bf8c2dc7378ef1f8418a -Author: Loren Merritt -Date: Sat Mar 22 01:49:52 2008 -0600 - - add tesa and lossless to fprofile - -commit 41a1a09f4e6f0bf5d30dbaa745adf554d5597b56 -Author: Loren Merritt -Date: Sat Mar 22 01:46:43 2008 -0600 - - cosmetics in residual_write - -commit 91b126573702ac2689462d857958a9a7579956b0 -Author: Loren Merritt -Date: Fri Mar 21 23:24:33 2008 -0600 - - remove unused bitstream reader - -commit 7822bab33a0471f01b8ff7d14eb7d843602953bd -Author: Loren Merritt -Date: Fri Mar 21 18:58:46 2008 -0600 - - cosmetics in quant asm - -commit 5d972bf3f220404f7e7bd7595f8c3a804191b35c -Author: Loren Merritt -Date: Fri Mar 21 18:46:29 2008 -0600 - - special case dequant for flat matrix - -commit f63770aa6540b9ced04043ae474b1d3b99f024f9 -Author: Loren Merritt -Date: Fri Mar 21 00:04:46 2008 -0600 - - faster dequant - -commit 263abc67c9c37d8cfddcb5bc9e9ddf768366429b -Author: Loren Merritt -Date: Thu Mar 20 22:08:07 2008 -0600 - - simplify hpel_filter_c - -commit ead697cad4c2090255fccecbded84346fd398075 -Author: Loren Merritt -Date: Thu Mar 20 19:35:54 2008 -0600 - - use x264_mc_copy_w16_sse2 in mc.copy, it was previously only in mc_luma - -commit 14b45a81c25be808e3da6d7b3e78051f6c5b5308 -Author: Loren Merritt -Date: Thu Mar 20 14:00:08 2008 -0600 - - new ssd_8x*_sse2 - align ssd_16x*_sse2 - unroll ssd_4x*_mmx - -commit 72869f7648732a0d39398068e6288730ad009135 -Author: Manuel Rommel -Date: Thu Mar 20 13:21:16 2008 -0600 - - update altivec zigzags - -commit dbfd2cc73b01dc3256a4b7bf65b8b0632fd73f96 -Author: Loren Merritt -Date: Thu Mar 20 10:41:50 2008 -0600 - - r768 borked cavlc - -commit ac49761ae664e86598609301d7ca25063a003151 -Author: Loren Merritt -Date: Thu Mar 20 00:52:11 2008 -0600 - - cosmetics in intra predict - -commit 5442dafdab1f18eb5d6f27813dbd0d8b1f37a300 -Author: Fiona Glaser -Date: Thu Mar 20 00:31:42 2008 -0600 - - faster intra predict 8x8 hu/hd - -commit 30da25a99e24e5c1ff5972b7f5c22c4be2a944b1 -Author: Loren Merritt -Date: Wed Mar 19 23:43:19 2008 -0600 - - reduce zigzag arrays from int to int16_t - -commit 7a125e4a89b6c1cfd5066706939b7dee5a755254 -Author: Loren Merritt -Date: Wed Mar 19 23:42:20 2008 -0600 - - reduce the size of some arrays - -commit 1d56ef44748dd3ae36751f27263ccefc22d5f543 -Author: Fiona Glaser -Date: Wed Mar 19 15:01:05 2008 -0600 - - skip intra pred+dct+quant in cases where it's redundant (analyse vs encode) - large speedup with trellis=2, small speedup with trellis=0 and/or subme>=6 - -commit b10ee560293bfff1ac2e72d8a6a61fae4812a9a6 -Author: Loren Merritt -Date: Wed Mar 19 14:03:34 2008 -0600 - - cosmetics in asm - -commit a3b97adbcc84448c9c24520ede8188d6c99bf5cb -Author: Fiona Glaser -Date: Wed Mar 19 14:00:34 2008 -0600 - - satd_4x4_ssse3 - -commit 8773988471e5469ebd00841cccb4eee8bbdb54dd -Author: Fiona Glaser -Date: Wed Mar 19 13:40:41 2008 -0600 - - get_ref_sse2 - -commit 8727a01bf21a52224c5de130e1173de31062ab87 -Author: Fiona Glaser -Date: Tue Mar 18 19:17:22 2008 -0600 - - continue instead of crash when the threading mv constraint is violated. - doesn't fix the underlying bug, but hopefully less annoying until we find it. - -commit b2d5df5fdf80e42fe03526f9d73983477f2013af -Author: Loren Merritt -Date: Tue Mar 18 18:24:01 2008 -0600 - - remove remaining reference to clip1.h - -commit 73b3fd48e592de96c05bdfe0cf7144c0da6ac650 -Author: Loren Merritt -Date: Tue Mar 18 12:34:10 2008 -0600 - - fix name mangling again. - apparently it's not just a convention, dll build fails if you try to export a non-prefixed name. - -commit 1e829dbf23af6bda0103c15de11115461e9bc504 -Author: Gabriel Bouvigne -Date: Mon Mar 17 15:44:40 2008 -0600 - - update msvc projectfile - -commit 08c7dd46963096ba4d3e9187fdf14ec1338fc959 -Author: Loren Merritt -Date: Mon Mar 17 15:41:59 2008 -0600 - - missing #ifdef HAVE_SSE3 - -commit 0f51933971f6bb695ef88c03e2c3e76c61d1c95f -Author: Loren Merritt -Date: Mon Mar 17 15:41:30 2008 -0600 - - don't define offsetof since it's standard - -commit cfa08dc7ba39460a777b5161aa8024d85197b0d3 -Author: Loren Merritt -Date: Mon Mar 17 01:23:35 2008 -0600 - - shut up gcc warning in offsetof - -commit e56ea0861b650e1ee5f3951d786bfc5297183574 -Author: Håkan Hjort -Date: Mon Mar 17 01:20:02 2008 -0600 - - increase alignment of mv arrays - -commit 5469a4baaf379ab30119c625067be6dd23cb3bfe -Author: Fiona Glaser -Date: Sun Mar 16 23:58:04 2008 -0600 - - memcpy_aligned_sse2 - -commit 9d0c0a90254e39adb581158d37a6946064fce4e2 -Author: Loren Merritt -Date: Sun Mar 16 22:40:43 2008 -0600 - - checkasm check whether callee-saved regs are correctly saved - x86_32 only for now since x86_64 varargs are annoying - -commit 1b1e12482da963334290bd157088427f37f4e2d3 -Author: Loren Merritt -Date: Sun Mar 16 22:28:20 2008 -0600 - - fix x86_32 ads which failed to preserve a register - -commit c82674fe244b8cd7828117f267a8fca2b62f7cfd -Author: Loren Merritt -Date: Sun Mar 16 16:34:41 2008 -0600 - - fix some name mangling issues introduced by the merge - -commit 20b4106bb404de6be488a979ffafad35b7b8f691 -Author: Loren Merritt -Date: Sun Mar 16 15:30:40 2008 -0600 - - remove x264_mc_clip1. - it's wrong for sufficiently perverse inputs, and clip_uint8 is faster anyway. - -commit c17218e8a37ca1ed93a0852b73acc5d4cc046bb8 -Author: Loren Merritt -Date: Sun Mar 16 13:54:58 2008 -0600 - - merge x86_32 and x86_64 asm, with macros to abstract calling convention and register names - -commit 3445cca40e490cd11075051215c5d7c49477c7f7 -Author: Loren Merritt -Date: Sun Mar 9 05:58:55 2008 -0600 - - git compatible version script - -commit 8609ffa0dd7092509c0ec5c4c667ab6eea503fd7 -Author: Loren Merritt -Date: Sun Mar 2 17:53:01 2008 -0700 - - check for broken versions of yasm - -commit 3d5beaee8325c2788f17a632558759ec95ec76e6 -Author: Loren Merritt -Date: Sun Mar 2 17:27:38 2008 -0700 - - increase the alignment of the i8x8 edge cache, needed for sse2 intra prediction. - patch by Alexander Strange. - -commit 25fd257d988cf7f4e00fa674e1e99417e4f7ef6e -Author: Loren Merritt -Date: Sun Mar 2 16:12:57 2008 -0700 - - .gitignore - -commit 9dce08ac53aa22695d1934bc321122863ac3739e -Author: Loren Merritt -Date: Sun Mar 2 03:04:07 2008 +0000 - - pic macros now keep track of which register holds the GOT, so variable access doesn't have to care - - - git-svn-id: svn://svn.videolan.org/x264/trunk@745 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 264f13aeaf52c7c8c38a35ab781561c4692e251e -Author: Loren Merritt -Date: Sun Mar 2 02:27:45 2008 +0000 - - remove x86_64 predict_8x8_ddl_mmxext because sse2 is faster even on amd - - - git-svn-id: svn://svn.videolan.org/x264/trunk@744 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 315285741877f89c660b9cefc3114963e95cf56a -Author: Loren Merritt -Date: Sun Mar 2 02:26:00 2008 +0000 - - cosmetics in dsp init - - - git-svn-id: svn://svn.videolan.org/x264/trunk@743 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 564cc252b099ad12e9c33dd9404ad64ed0bc5b8f -Author: Loren Merritt -Date: Sun Mar 2 02:11:12 2008 +0000 - - sse2 16x16 intra pred. - port the remaining intra pred functions from x86_64 to x86_32. - patch by Fiona Glaser. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@742 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c48882dd3f9a51b64c2d129f604bedb79d140626 -Author: Loren Merritt -Date: Sat Mar 1 13:47:05 2008 +0000 - - some simplifications to mmx intra pred that should have been done way back when we switched to constant fdec_stride. - and remove pic spills in functions that have a free caller-saved reg. - patch partly by Fiona Glaser. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@741 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78ec787957d66996336224fde5e0bec38bf11b3c -Author: Loren Merritt -Date: Sat Mar 1 07:30:34 2008 +0000 - - faster array_non_zero - - - git-svn-id: svn://svn.videolan.org/x264/trunk@740 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 405d9c668463f784301b705633d7723638e068e3 -Author: Loren Merritt -Date: Sat Mar 1 04:33:24 2008 +0000 - - x86_32 sse2 idct8 - ported from ffmpeg by Fiona Glaser - - - git-svn-id: svn://svn.videolan.org/x264/trunk@739 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b6d6d2324acc160ac00267da14b21166d89cff92 -Author: Loren Merritt -Date: Sat Mar 1 04:13:55 2008 +0000 - - checkasm: relax the threshold for floating-point ssim - - - git-svn-id: svn://svn.videolan.org/x264/trunk@738 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 68399d5f330bd7307d9b8fd4fc4a63f2c5540009 -Author: Loren Merritt -Date: Sat Mar 1 04:07:44 2008 +0000 - - checkasm: test idct with the range of coefficients what can really be encountered, as opposed to random numbers which might overflow. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@737 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bff0357aaea8eda709e46e9a3f8c38d110ecf8a6 -Author: Loren Merritt -Date: Mon Jan 28 14:33:42 2008 +0000 - - intra_rd_refine in B-frames - - - git-svn-id: svn://svn.videolan.org/x264/trunk@736 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0289fc10a7522d6d91ae2906f053ad26b775d8d7 -Author: Loren Merritt -Date: Sun Jan 27 16:29:54 2008 +0000 - - print average of macroblock QPs instead of frame's nominal QP - - - git-svn-id: svn://svn.videolan.org/x264/trunk@735 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a783a84024df94041b9c2805bc94c0bc343f2b1a -Author: Loren Merritt -Date: Sun Jan 27 16:16:37 2008 +0000 - - update date - - - git-svn-id: svn://svn.videolan.org/x264/trunk@734 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ecbc00bfb735824419101f8e2186bdae18893c89 -Author: Loren Merritt -Date: Sun Jan 27 16:06:31 2008 +0000 - - remove colorspace conversion support, because it has no business in any codec - - - git-svn-id: svn://svn.videolan.org/x264/trunk@733 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 44f5e6bda9646eea70d57effdb93255721a475a3 -Author: Loren Merritt -Date: Sun Jan 27 14:01:40 2008 +0000 - - misc fixes in checkasm - - - git-svn-id: svn://svn.videolan.org/x264/trunk@732 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1b59c20789dd95c06074bd521d7b1f15fa139b07 -Author: Loren Merritt -Date: Sun Jan 27 13:39:09 2008 +0000 - - remove a useless bit of me=umh (originally copied from JM, where it was used for something) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@731 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 330e7364c764a18ebba95f26cd638b7992498e1e -Author: Loren Merritt -Date: Sun Jan 27 11:50:50 2008 +0000 - - fix a memleak in cqm - - - git-svn-id: svn://svn.videolan.org/x264/trunk@730 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9a8218ca82fe1027487651c4fc80c262ff935699 -Author: Loren Merritt -Date: Sun Jan 27 11:49:16 2008 +0000 - - fix a memleak in mkv muxer - patch by saintdev - - - git-svn-id: svn://svn.videolan.org/x264/trunk@729 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8d09ebe2e862688ce213d3f098ce7eca719fea23 -Author: Loren Merritt -Date: Sun Jan 27 11:36:11 2008 +0000 - - satd exhaustive motion search (--me tesa) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@728 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 12c833c863c0e119212977d25723431bc66088a4 -Author: Loren Merritt -Date: Sun Jan 27 11:09:52 2008 +0000 - - fix cabac context for nonzero delta_qp of the 2nd mb of a frame in interlaced mode - - - git-svn-id: svn://svn.videolan.org/x264/trunk@727 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3f54ed167a5546bd9b8639285090c71a82bbffc4 -Author: Loren Merritt -Date: Sun Jan 27 10:32:36 2008 +0000 - - fix mapping of mvs to partitions in p4x4_chroma - patch by Noboru Asai - - - git-svn-id: svn://svn.videolan.org/x264/trunk@726 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fa8df0155128105e59155a38d3dcba8e5e765e16 -Author: Loren Merritt -Date: Sun Jan 27 10:12:24 2008 +0000 - - fix mvp for b16x8 and b8x16 L1 search - patch by Wei-Yin Chen - - - git-svn-id: svn://svn.videolan.org/x264/trunk@725 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f98a8e20888847527edcff6a9244de2dc714e42c -Author: Loren Merritt -Date: Sun Jan 27 10:05:20 2008 +0000 - - shave a couple cycles off cabac functions - - - git-svn-id: svn://svn.videolan.org/x264/trunk@724 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 75c77579a5e6386250aa85335408d5d8ed475df4 -Author: Loren Merritt -Date: Sun Jan 27 09:12:39 2008 +0000 - - faster and smaller x264_macroblock_cache_mv etc - - - git-svn-id: svn://svn.videolan.org/x264/trunk@723 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dc95c0f4d30e5289442b1679bf674b92f18f5083 -Author: Loren Merritt -Date: Sun Jan 27 09:11:01 2008 +0000 - - configure test for endianness - - - git-svn-id: svn://svn.videolan.org/x264/trunk@722 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e644e7eaece255a711a9a2eff5e708cd9168bb71 -Author: Loren Merritt -Date: Fri Jan 18 00:42:38 2008 +0000 - - change the meaning of --ref: it now selects DPB size (including B-frames), rather than L0 size (which B-frames are added to) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@721 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8105162bf7fba8b7d467d5a185db25e4696d74d6 -Author: Guillaume Poirier -Date: Mon Jan 14 09:54:33 2008 +0000 - - add / fix support for FreeBSD, based on a patch by Igor Mozolevsky % igor A hybrid-lab P co P uk % - - - git-svn-id: svn://svn.videolan.org/x264/trunk@720 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 85a9209e7b85e7a38fa3770f963dc9535fe4a19c -Author: Loren Merritt -Date: Wed Jan 9 11:25:09 2008 +0000 - - shut up some valgrind warnings - - - git-svn-id: svn://svn.videolan.org/x264/trunk@719 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ab69c14b91fc4fcdd15b5449fb549c05fe94e9a8 -Author: Loren Merritt -Date: Tue Jan 8 18:10:51 2008 +0000 - - slightly wrong memory allocation in r717, fixes a potential crash with merange>32 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@718 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2ed861c86065fd556a6f7b18718725c1fc04452b -Author: Loren Merritt -Date: Sun Jan 6 08:15:04 2008 +0000 - - convert absolute difference of sums from mmx to sse2 - convert mv bits cost and ads threshold from C to sse2 - convert bytemask-to-list from C to scalar asm - 1.6x faster me=esa (x86_64) or 1.3x faster (x86_32). (times consider only motion estimation. overall encode speedup may vary.) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@717 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c0fb035a73e93744ad443e0086c810e9bef38232 -Author: Loren Merritt -Date: Sun Jan 6 08:06:36 2008 +0000 - - round esa range to a multiple of 4 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@716 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 644878dc39b22f41c447705c0f70c0b728204068 -Author: Guillaume Poirier -Date: Thu Jan 3 22:24:38 2008 +0000 - - use define _WIN32 instead of __WIN32__ or WIN32 defines. - NSDN reference: http://msdn2.microsoft.com/en-us/library/b0084kay(VS.80).aspx - Patch by BugMaster %BugMaster A narod P ru% - Original thread: - date: Dec 27, 2007 3:18 AM - subject: [x264-devel] VS2008 compilation error (need of replacement __WIN32__ with _WIN32) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@715 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 04e389d4d9ed2b1ed853822e64bf9c4db71ac841 -Author: Loren Merritt -Date: Fri Dec 21 01:57:14 2007 +0000 - - tweak x264_pixel_sad_x4_16x16_sse2 horizontal sum. 168 -> 166 cycles on core2. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@714 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 665c02975800d1f6d44d2250f869ccfe78405c19 -Author: Loren Merritt -Date: Thu Dec 20 19:24:17 2007 +0000 - - fix a nondeterminism involving 8x8dct, rdo, and threads. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@713 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 55b152e408df09b011b1d60519f890746de64aa1 -Author: Guillaume Poirier -Date: Thu Dec 13 15:43:41 2007 +0000 - - also test arch-specific x264_zigzag_* implementations in checkasm.c - patch by Patch by Noboru Asai % noboru P asai A gmail P com% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@712 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e2eb874c1dfd15e62a3801af7e900baebf33746e -Author: Guillaume Poirier -Date: Mon Dec 10 22:09:13 2007 +0000 - - Add AltiVec implementation of - - x264_zigzag_scan_4x4_frame_altivec() - - x264_zigzag_scan_4x4ac_frame_altivec() - - x264_zigzag_scan_4x4_field_altivec() - - x264_zigzag_scan_4x4ac_field_altivec() - each around 1.3 tp 1.8x faster than C version - Patch by Noboru Asai % noboru P asai A gmail P com% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@711 df754926-b1dd-0310-bc7b-ec298dee348c - -commit adba3e534e72eb585982f52ef0c521a26b9fb90a -Author: Guillaume Poirier -Date: Sun Dec 9 15:50:52 2007 +0000 - - adds AliVec implementation of predict_16x16_p() - over 4x faster than C version - - - git-svn-id: svn://svn.videolan.org/x264/trunk@710 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e241f028f34c84321213ad6436748e4769e289c5 -Author: Loren Merritt -Date: Tue Dec 4 21:56:18 2007 +0000 - - revert the x86_32 part of r708. elf shared libraries aren't important enough to be worth the extra lines of code to check for nasm. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@709 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6d185b463056b2cc589aa25c5ed598e63d48dc03 -Author: Loren Merritt -Date: Mon Dec 3 01:17:23 2007 +0000 - - mark asm functions as hidden - - - git-svn-id: svn://svn.videolan.org/x264/trunk@708 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 316150357332e6adcdaeea45950e1f5e94d0b7dc -Author: Loren Merritt -Date: Mon Dec 3 01:16:57 2007 +0000 - - check whether ld supports -Bsymbolic before using it - - - git-svn-id: svn://svn.videolan.org/x264/trunk@707 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9784fa625ff0ccc6c25e52c18d8858d15584ab44 -Author: Loren Merritt -Date: Sun Dec 2 15:57:43 2007 +0000 - - reduce the data type used in some tables. 16KB smaller exe. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@706 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e93dcb07eb6937550dee8cf4cd324a99fa7dae2c -Author: Loren Merritt -Date: Sat Dec 1 18:03:16 2007 +0000 - - faster removal of duplicate mv predictors - - - git-svn-id: svn://svn.videolan.org/x264/trunk@705 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5d49ebd28eab0597fe74ce4d6fdffe768b145442 -Author: Loren Merritt -Date: Sat Dec 1 15:17:19 2007 +0000 - - avoid a division in x264_mb_predict_mv_ref16x16. - patch by Fiona Glaser. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@704 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 98f69c4af8834fd7c05f4e3fe9154288610fd912 -Author: Loren Merritt -Date: Sat Dec 1 02:58:34 2007 +0000 - - avoid a division in umh. - patch by Fiona Glaser. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@703 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0642e139bffbf180f293ea4a94e2576c6963f5dd -Author: Loren Merritt -Date: Mon Nov 26 11:44:37 2007 +0000 - - fix a memleak in h->mb.mvr - - - git-svn-id: svn://svn.videolan.org/x264/trunk@702 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 290de9638e5364c37316010ac648a6c959f6dd26 -Author: Loren Merritt -Date: Sun Nov 25 12:38:19 2007 +0000 - - fix compilation as a shared library on x86_64 (regression in r696) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@701 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e5479d7c58110a219b875889449bb3dc027735ea -Author: Guillaume Poirier -Date: Wed Nov 21 18:30:49 2007 +0000 - - add support for x86_64 on Darwin9.0 (Mac OS X 10.5, aka Leopard) - Patch by Antoine Gerschenfeld %gerschen A clipper P ens P fr% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@700 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bc77b5b91801325b0e36ac300535811658104690 -Author: Loren Merritt -Date: Wed Nov 21 11:52:19 2007 +0000 - - cover some more options in fprofile. (esa, bime, cqm, nr, no-dct-decimate, trellis2) - previously, esa was slower with fprofile than without, since gcc thought it wasn't important. now esa benefits like anything else. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@699 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0291a044fa4334173c3129d80bf0e893a6d143d7 -Author: Guillaume Poirier -Date: Tue Nov 20 18:22:03 2007 +0000 - - Add AltiVec implementation of x264_pixel_ssd_8x8, 3x faster than C version - Overall speed-up: 0.7% with --bframes 3 --ref 5 -m 7 --b-rdo - Patch by Noboru Asai %noboru P asai A gmail P com% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@698 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9abaaaece8e7a4495d2b22bd47c39c3d81172610 -Author: Loren Merritt -Date: Tue Nov 20 08:53:26 2007 +0000 - - limit mvs to [-512,511.75] instead of [-512,512] - - - git-svn-id: svn://svn.videolan.org/x264/trunk@697 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d4ebafa5d7db55e0d21c633c25d4835c5b94e3fd -Author: Loren Merritt -Date: Tue Nov 20 06:07:17 2007 +0000 - - avoid memory loads that span the border between two cachelines. - on core2 this makes x264_pixel_sad an average of 2x faster. other intel cpus gain various amounts. amd are unaffected. - overall speedup: 1-10%, depending on how much time is spent in fullpel motion estimation. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@696 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 125e0a84e04d04ac2dde69e091a75295f35120bc -Author: Loren Merritt -Date: Tue Nov 20 05:57:29 2007 +0000 - - add cache info to cpu_detect. also print sse3. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@695 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b5d6d038ce4c643c6e6b096890728aba8c8db35c -Author: Loren Merritt -Date: Mon Nov 19 17:10:57 2007 +0000 - - cosmetics: reorder mc_luma/mc_chroma/get_ref arguments for consistency with other functions - - - git-svn-id: svn://svn.videolan.org/x264/trunk@694 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 94c55d8cc75ea88987ec6766d83ea0dd0aa7384f -Author: Loren Merritt -Date: Mon Nov 19 17:08:07 2007 +0000 - - separate pixel_avg into cases for mc and for bipred - - - git-svn-id: svn://svn.videolan.org/x264/trunk@693 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fcbd7e0c80d30dc7f0b098e704038e015f536be5 -Author: Guillaume Poirier -Date: Sun Nov 18 23:58:18 2007 +0000 - - add AltiVec implementation of ssim_4x4x2_core, about 4x faster than C version. - Overall: 0.1-0.2% faster with default encoding settings - Patch by Noboru Asai %noboru P asai A gmail P com% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@692 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c0cd142e71575907c570ce6606e38b726e36d10e -Author: Guillaume Poirier -Date: Sun Nov 18 23:47:41 2007 +0000 - - Add AltiVec implementation ofx264_hpel_filter. Provides a 10-11% overall speed-up with default encoding options - Patch by Noboru Asai %noboru P asai A gmail P com% - - - git-svn-id: svn://svn.videolan.org/x264/trunk@691 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a6e82dfff168a77d7801c08e61918b3e91db84cc -Author: Loren Merritt -Date: Sun Nov 18 01:45:44 2007 +0000 - - cosmetics in dsp function selection - - - git-svn-id: svn://svn.videolan.org/x264/trunk@690 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2cd3c7b140e7bc60d902e4e7a049c17e82c02f6e -Author: Loren Merritt -Date: Sat Nov 17 10:21:46 2007 +0000 - - remove sad_pde. it's been unused ever since successive elimination replaced it. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@689 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d9891656c3c376eb84fec3f85ebdee385d4de1f2 -Author: Loren Merritt -Date: Fri Nov 16 10:27:14 2007 +0000 - - cosmetics: use symbolic constants for frame padding radius - - - git-svn-id: svn://svn.videolan.org/x264/trunk@688 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7b8c02d57b9b73a57ea7e58dd74b072197def2c7 -Author: Loren Merritt -Date: Fri Nov 16 09:17:58 2007 +0000 - - move hpel_filter cpu detection to a function pointer like everything else - - - git-svn-id: svn://svn.videolan.org/x264/trunk@687 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a79aac149ff148a6e32c7a305bac224490866a8f -Author: Loren Merritt -Date: Thu Nov 15 10:50:37 2007 +0000 - - cosmetics: use separate variables for frame width and stride - - - git-svn-id: svn://svn.videolan.org/x264/trunk@686 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a8650641e1006d9750cc97d3c1672871c4549296 -Author: Guillaume Poirier -Date: Mon Nov 12 20:36:33 2007 +0000 - - Add AltiVec implementation of add4x4_idct, add8x8_idct, add16x16_idct, 3.2x faster on average - 1.05x faster overall with default encoding options - Patch by Noboru Asai % noboru DD asai AA gmail DD com % - - - git-svn-id: svn://svn.videolan.org/x264/trunk@685 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3b6b4c412037f072d0511cf48524987f3b927428 -Author: Guillaume Poirier -Date: Mon Nov 12 20:28:30 2007 +0000 - - add AltiVec implementation of dequant_4x4 and dequant_8x8, 2.8x faster than C, - 1.01x faster than previous revision with default encoding options - Patch by Noboru Asai % noboru DD asai AA gmail DD com % - - - git-svn-id: svn://svn.videolan.org/x264/trunk@684 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 09334c1a26d8b5485f12c233242d0aaf91003aea -Author: Guillaume Poirier -Date: Mon Nov 12 12:47:38 2007 +0000 - - Add AltiVec implementation of quant_2x2_dc, - fix Altivec implementation of quant_(4x4|8x8)(|_dc) wrt current C implementation - Patch by Noboru Asai % noboru DD asai AA gmail DD com % - - - git-svn-id: svn://svn.videolan.org/x264/trunk@683 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 57461cb4cec75eff41f1ac5b0f136511c5ccad28 -Author: Loren Merritt -Date: Thu Nov 1 12:21:13 2007 +0000 - - fix a possible nondeterminism with me=umh + threads. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@682 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 22455694153d43a9f85837db6eee641ebc4dcdb6 -Author: Loren Merritt -Date: Mon Oct 29 14:48:46 2007 +0000 - - use hex instead of dia for rdo mv refinement. ~0.5% lower bitrate at subme=7. - patch by Fiona Glaser. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@681 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 35094bec4e0202cbdb710b98fa04ea24375531e0 -Author: Loren Merritt -Date: Mon Sep 24 13:37:44 2007 +0000 - - port sad_*_x3_sse2 to x86_64 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@680 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 673ce32a59310b5494049cce140e5420128331ed -Author: Loren Merritt -Date: Mon Sep 24 11:24:28 2007 +0000 - - don't overwrite pthread* namespace, because system headers might define those functions even if we don't want them - - - git-svn-id: svn://svn.videolan.org/x264/trunk@679 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5c49545c0277a8da1c29809118a734c461116c54 -Author: Loren Merritt -Date: Fri Sep 21 20:20:22 2007 +0000 - - faster 4x4 sad - - - git-svn-id: svn://svn.videolan.org/x264/trunk@678 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a6edfd669f97154571f203cfa69d634a639800ff -Author: Loren Merritt -Date: Thu Sep 20 08:10:45 2007 +0000 - - fix an arithmetic overflow in trellis at high qp. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@677 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 463437926e73b3d18542804cbef31f277d115cc2 -Author: Loren Merritt -Date: Sat Sep 15 06:34:05 2007 +0000 - - implement multithreaded me=esa - - - git-svn-id: svn://svn.videolan.org/x264/trunk@676 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cde5f334121ba1cf6ae13174337ae49008c1f2a4 -Author: Loren Merritt -Date: Wed Sep 12 05:42:23 2007 +0000 - - fix some integer overflows. now vbv size can exceed 2 Gbit. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@675 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d16a4da48b06671e85578ee022729bb2fb6f59c9 -Author: Loren Merritt -Date: Sun Sep 9 03:17:20 2007 +0000 - - allow --vbv-init to take absolute values (in kbit), in addition to the previous fractions of vbv-bufsize. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@674 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 98494077449c4a66ed55cd5ee5a89b7c62e12dd0 -Author: Loren Merritt -Date: Fri Sep 7 20:40:13 2007 +0000 - - remove a bashism - - - git-svn-id: svn://svn.videolan.org/x264/trunk@673 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 759620535bccdb4d872fcc3b798eb6df68c672db -Author: Loren Merritt -Date: Sun Sep 2 04:32:17 2007 +0000 - - reorder headers so that largefile support is defined before the first copy of stdio - - - git-svn-id: svn://svn.videolan.org/x264/trunk@672 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71af28517442b4388c031d1ce74fa5096beca9bf -Author: Loren Merritt -Date: Mon Aug 20 16:44:42 2007 +0000 - - regression in r669: broke saving of configure args if make has to re-run configure - - - git-svn-id: svn://svn.videolan.org/x264/trunk@671 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 393daac2d7578e0a9d7d541f25026f22f62d03bc -Author: Loren Merritt -Date: Sat Aug 18 01:13:22 2007 +0000 - - regression in r669: --enable-shared should imply --enable-pic on some archs. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@670 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 113800851647deafdacc9a35ee8a1a761f1c777d -Author: Loïc Minier -Date: Sun Aug 12 12:46:15 2007 +0000 - - * Add a --host flag to allow overriding config.guess; this is particularly - useful with a 64-bits kernel running a 32-bits userland to build 32-bits - apps. - * Normalize any host triplet into a quadruplet via config.sub. - * Move option parsing before any use of architecture information. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@669 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3d51a5ba7966542355f0e3904f6e8319b9ba55c2 -Author: Loïc Minier -Date: Sun Aug 12 12:36:23 2007 +0000 - - * Update config.guess. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@668 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0ea7d1b2ecb6d744c0af674e7b3b8f85eabf3aaa -Author: Loren Merritt -Date: Tue Jul 17 11:24:26 2007 +0000 - - mingw doesn't have strtok_r - - - git-svn-id: svn://svn.videolan.org/x264/trunk@667 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 85f2fc3252f0fae8031fa9e942577d121cb75cd1 -Author: Loren Merritt -Date: Tue Jul 17 11:11:19 2007 +0000 - - move os/compiler specific defines to their own header - - - git-svn-id: svn://svn.videolan.org/x264/trunk@666 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a18f3dab89e6786233c11723900f8b0126e1494d -Author: Loren Merritt -Date: Thu Jul 12 23:48:23 2007 +0000 - - extend zones to support (some) encoding parameters in addition to ratecontrol. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@665 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d5ddf40b1ae1c267ea3f9e998d6ac1e3a4004e07 -Author: Loren Merritt -Date: Fri Jul 6 17:08:26 2007 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@664 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ee62378f91c3cfcc028e771b8bc998b4490cc8a1 -Author: Loren Merritt -Date: Thu Jun 28 21:26:21 2007 +0000 - - limit vertical motion vectors to +/-512, since some decoders actually depend on that limit. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@663 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 303175413b0f1f38d488c7decbce9d7ccb81f647 -Author: Guillaume Poirier -Date: Fri Jun 22 21:42:41 2007 +0000 - - Add vertical and horizontal luma deblocking accelerated with Altivec, - based on Graham Booker's code written for FFmpeg with slight modifications - to re-use x264's macros - - - git-svn-id: svn://svn.videolan.org/x264/trunk@662 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9acd233b21bb1c0808b2f4c4c100511b3d6d45a1 -Author: Loren Merritt -Date: Sat Jun 16 04:09:01 2007 +0000 - - cosmetics in cpu detection - - - git-svn-id: svn://svn.videolan.org/x264/trunk@661 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 80090ccfb3493659451c21506d77dcacda6bcab2 -Author: Loren Merritt -Date: Sat Jun 16 04:02:48 2007 +0000 - - fix compilation without asm on x86_32 (r658 worked only on x86_64). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@660 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fedfacea656db5a327fba0c449fe70181539f876 -Author: Loren Merritt -Date: Sun Jun 10 23:46:31 2007 +0000 - - exempt 1080p from the non-mod16 warning. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@659 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 17dd119d8275224ad7f830dcdf4a5458150fcd31 -Author: Alex Izvorski -Date: Tue Jun 5 18:38:31 2007 +0000 - - allow compiling without yasm/nasm on x86 and x86-64 platforms - - git-svn-id: svn://svn.videolan.org/x264/trunk@658 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 26bed72be4941353b4c79eddf500ce39aaf75490 -Author: Alex Izvorski -Date: Tue Jun 5 18:32:13 2007 +0000 - - updated MS VC8/VC7 build, patch by Gabriel Bouvigne - - git-svn-id: svn://svn.videolan.org/x264/trunk@657 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a35548661f5f50c22516147b9b164f25d44a69db -Author: Alex Izvorski -Date: Sat May 26 03:13:08 2007 +0000 - - replace alloca with malloc everywhere. per manpage, use of alloca is discouraged. this may have a minor effect on the speed of ssim and esa, but that appears too small to measure. - - git-svn-id: svn://svn.videolan.org/x264/trunk@656 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ffa4e76d4573303271c768f6ec03c21f0f2f4f02 -Author: Loren Merritt -Date: Wed May 2 21:33:43 2007 +0000 - - require a ratecontrol method to be specified, it no longer defaults to cqp=26. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@655 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b5ef788a7c6d2c7ba2cdb664ffd5544603979495 -Author: Loren Merritt -Date: Mon Apr 23 08:34:52 2007 +0000 - - fix nnz computation in cavlc+8x8dct+deblock. (regression in r607) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@654 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 08b4f6956135d54693b4e618f58fd7f68e654473 -Author: Loren Merritt -Date: Mon Apr 23 07:09:30 2007 +0000 - - fix the computation of bits used for vbv. (regression in r651) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@653 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fe85aca1c58ef4975aee8e6e32b4d57624960367 -Author: Loren Merritt -Date: Sun Apr 22 03:37:56 2007 +0000 - - c89 compile fix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@652 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b3076aef6c40f10260ef7386e3f2e028997da5d5 -Author: Loren Merritt -Date: Sat Apr 21 11:32:34 2007 +0000 - - cabac: use bytestream instead of bitstream. - 35% faster cabac, 20% faster overall lossless, ~1% faster overall at normal bitrates. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@651 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8300d3344b73612ec449a2c2ee259c654fef9d0a -Author: Loren Merritt -Date: Wed Apr 11 22:21:15 2007 +0000 - - remove the restriction on number of threads as a function of resolution (it was wrong anyway in the presence of B-frames), and raise the max number of threads in general (though more will have to be done before it can really scale to lots of cores). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@650 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8ecf4b912011e9a097d570086553f4685d5dddc5 -Author: Loren Merritt -Date: Tue Apr 10 22:37:18 2007 +0000 - - tweak ssse3 quant - - - git-svn-id: svn://svn.videolan.org/x264/trunk@649 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c266480eb24e7886877f52436056e99fced02cdc -Author: Loren Merritt -Date: Sat Apr 7 04:53:16 2007 +0000 - - change some tables from int to int8_t. 13KB smaller executable. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@648 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2f66c11a4eeb17950b3aee18cc105572e860ec44 -Author: Loren Merritt -Date: Fri Apr 6 21:45:33 2007 +0000 - - faster cabac rdo. up to 10% faster at q0, but negligible at normal bitrates. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@647 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3e7b136c8525f73f6e01be260adbfc15c34503d7 -Author: Loren Merritt -Date: Fri Apr 6 21:17:34 2007 +0000 - - workaround gcc's inability to align variables on the stack. - this crash was introduced in r642, but only because previous versions didn't use sse2 on the stack. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@646 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 84676d2eba9fc18d62e168b60d7d1118d1c232d3 -Author: Loren Merritt -Date: Thu Apr 5 16:11:03 2007 +0000 - - 32bit version of ssse3 satd. - switch default assembler to yasm. it will still fallback to nasm if you don't have yasm. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@645 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71c097b28e1405076f554d3b948885fd69c1774f -Author: Loren Merritt -Date: Wed Apr 4 19:34:02 2007 +0000 - - simplify trellis - - - git-svn-id: svn://svn.videolan.org/x264/trunk@644 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 12681eea55239d21eb8ce0ca058c5d859afedc9f -Author: Loren Merritt -Date: Wed Apr 4 18:59:20 2007 +0000 - - fix an arithmetic overflow in trellis with QP >= 42 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@643 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 10265a0c2a0b29e6252ad3be6fad1569e7a04339 -Author: Loren Merritt -Date: Wed Apr 4 18:45:25 2007 +0000 - - 2x faster quant. 2% overall. - side effects: - not bit-identical to the previous algorithm. - while the new algorithm covers a wider range of cqms than the previous one did, - I couldn't find a good way to fallback to a general version for the extreme - cqms. so now it refuses to encode extreme cqms instead of just being slower. - lays a framework for custom deadzone matrices, though I didn't add an api. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@642 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b37ac36e8442dd8fd8f25933b6a0a119d471e8f7 -Author: Loren Merritt -Date: Wed Apr 4 18:35:51 2007 +0000 - - when encoding with a cqm, probe_skip now also uses the cqm, instead of the flat matrix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@641 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e3a07e098f96dfc2dbde8da6cad77ed012d4397e -Author: Loren Merritt -Date: Wed Apr 4 00:48:55 2007 +0000 - - cosmetics in asm macros - - - git-svn-id: svn://svn.videolan.org/x264/trunk@640 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71943e8acbfdb85d944f2800e49bcb7902afaaf3 -Author: Alex Izvorski -Date: Tue Apr 3 17:18:17 2007 +0000 - - use only c-style comments in public header (patch by Vincent Torres) - - git-svn-id: svn://svn.videolan.org/x264/trunk@639 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dfb854775c7b52945a84ef756dc88a4ccb7c2d2c -Author: Loren Merritt -Date: Mon Apr 2 23:56:09 2007 +0000 - - in hpel search, merge two 16x16 mc calls into one 16x17. 15% faster hpel, .3% overall. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@638 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e63c3924ef2c9790aa6440fa11dddf1026862f23 -Author: Christophe Mutricy -Date: Mon Apr 2 19:17:28 2007 +0000 - - Compile fix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@637 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dd7e21c6cba26298eac190abcb702b490bd98c5e -Author: Loren Merritt -Date: Fri Mar 30 20:20:36 2007 +0000 - - remove private stuff from public headers. no more need for -D__X264__ - - - git-svn-id: svn://svn.videolan.org/x264/trunk@636 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 87fdea89e007258c2d48156f14b883ef29c02831 -Author: Loren Merritt -Date: Sat Mar 24 12:58:27 2007 +0000 - - adjust bitstream buffer sizes for very large frames - - - git-svn-id: svn://svn.videolan.org/x264/trunk@635 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8b37cc6aa5e23bf4a529b79745f73dedab1fa4ff -Author: Loren Merritt -Date: Wed Mar 14 22:41:50 2007 +0000 - - conflate HAVE_MMXEXT with HAVE_SSE2, since they were never used distinctly. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@634 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 11ef32f432b8e055c30c99531e25320dbce8f656 -Author: Sam Hocevar -Date: Wed Mar 14 21:53:47 2007 +0000 - - * Made -DNEED_ALTIVEC unnecessary, thanks to Guillaume Poirier. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@633 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 37ad2377b8781330d400985ce12a1ec61067a5e4 -Author: Sam Hocevar -Date: Wed Mar 14 21:31:50 2007 +0000 - - * check x264_cpu_detect() before calling AltiVec functions. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@632 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8aef0e941d986f10427cc2d3a848162065bdef3a -Author: Loren Merritt -Date: Wed Mar 14 21:11:11 2007 +0000 - - ssse3 detection. x86_64 ssse3 satd and quant. - requires yasm >= 0.6.0 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@631 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1980de9bba111561be5ad3dde37b6f7a29a80a4e -Author: Sam Hocevar -Date: Wed Mar 14 20:40:24 2007 +0000 - - * Use -maltivec when building dependencies, or cannot be used. - * Do not declare vectors in non-AltiVec files. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@630 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e1a4ae9ef58f461aa8ca1e0a1f88140a61d03680 -Author: Sam Hocevar -Date: Wed Mar 14 18:04:06 2007 +0000 - - * common/cpu.c: runtime AltiVec autodetection on Linux. - * configure, Makefile: do not build the whole project with -maltivec because - it generates AltiVec code in weird places. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@629 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f81c3eafa27ad90a02c1d87a9f74b509199ddb63 -Author: Loren Merritt -Date: Mon Mar 5 15:35:42 2007 +0000 - - fix a small memleak. - patch by Limin Wang. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@628 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 62fc8a9c507166baaf104e69fd4c055d41fb0dce -Author: Guillaume Poirier -Date: Sat Mar 3 12:59:23 2007 +0000 - - compile fix for GCC-3.3 on OSX, based on a patch by - Patrice Bensoussan % patrice P bensoussan A free P fr% - Note: regression test still do not pass with GCC-3.3, - but they never did as far as I can remember. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@627 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 89930744ed6921c5dfb0ffdfad2ca6059b2e6c6a -Author: Loren Merritt -Date: Sat Mar 3 12:12:54 2007 +0000 - - cosmetics in regression test - - - git-svn-id: svn://svn.videolan.org/x264/trunk@626 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6e1bbd2ff961ee51d8a5bd566784f19fb194c028 -Author: Alex Izvorski -Date: Sat Mar 3 11:44:01 2007 +0000 - - regression testing, run similar to fprofiled: VIDS='vid_720x480.yuv' make test - - git-svn-id: svn://svn.videolan.org/x264/trunk@625 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 435b675bac7e89d34644affd69bf88a82ddc9242 -Author: Alex Izvorski -Date: Wed Feb 28 18:47:04 2007 +0000 - - add ability to generate doxygen documentation; make dox - - git-svn-id: svn://svn.videolan.org/x264/trunk@624 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dac2be0cc94b897749381135e248cd1844f58fda -Author: Loren Merritt -Date: Thu Feb 22 05:01:38 2007 +0000 - - oops, scenecut detection failed to activate when using threads and not using B-frames - - - git-svn-id: svn://svn.videolan.org/x264/trunk@623 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 49d16d48a132b3de8b80a59752e95dbe896e7477 -Author: Loren Merritt -Date: Mon Jan 29 14:42:42 2007 +0000 - - extras/getopt.c was BSD licensed. replace with a LGPL version (from glibc). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@622 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 972172560df8efb3d2162938526b3a3c812710a4 -Author: Guillaume Poirier -Date: Thu Jan 25 08:32:16 2007 +0000 - - Fix build issues on Linux. Only gcc-4.x is supported, as on OSX. - Cleans up a few inconsistencies in the code too. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@621 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c9cd0fce3fcdb83fee9a49987abaa9983b4d1cf4 -Author: Loren Merritt -Date: Sun Jan 21 12:12:04 2007 +0000 - - tweak block_residual_write_cavlc. - up to 1% faster lossless, no difference at normal bitrates. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@620 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4c949c31a111c461b0362409baaff4550b7f664f -Author: Loren Merritt -Date: Sat Jan 20 05:07:44 2007 +0000 - - don't assume int is exactly 4 bytes - - - git-svn-id: svn://svn.videolan.org/x264/trunk@619 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cf9e6c5c0ea4771da523d1a95517b2276cca6cf1 -Author: Loren Merritt -Date: Thu Jan 11 23:55:51 2007 +0000 - - make array_non_zero() compatible with -fstrict-aliasing - - - git-svn-id: svn://svn.videolan.org/x264/trunk@618 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 285f98e197be56efde6b2c42832625193c432c54 -Author: Christophe Mutricy -Date: Tue Jan 9 20:25:32 2007 +0000 - - Honor CFLAGS and LDFLAGS set by the user - - - git-svn-id: svn://svn.videolan.org/x264/trunk@617 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0fe97f423591939454ac7d272fe5fb5dde837b3f -Author: Eric Petit -Date: Tue Jan 2 14:51:10 2007 +0000 - - Check whether 'echo -n' works, otherwise try printf (fixes build on current OS X 10.5) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@616 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bbc68bea7c30988d9f59e2ac99f88f9188f65654 -Author: Eric Petit -Date: Mon Jan 1 22:41:44 2007 +0000 - - Check version of nasm on OS X / Intel - - - git-svn-id: svn://svn.videolan.org/x264/trunk@615 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b630af6c4af3118453d91aae75d2e61c513f2010 -Author: Loren Merritt -Date: Wed Dec 20 04:22:59 2006 +0000 - - wrong reference frames were used with refs>=14 + pyramid (regression in r607) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@614 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 01e1db245d0c1c6119a8316994f2083c6096811a -Author: Loren Merritt -Date: Tue Dec 19 21:24:47 2006 +0000 - - enable thread synchronization primitives on linux too - - - git-svn-id: svn://svn.videolan.org/x264/trunk@613 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 34c6fb35ac799907caed6ad1cbbc287136c6b6c6 -Author: Loren Merritt -Date: Tue Dec 19 09:35:45 2006 +0000 - - fix a crash with x264_encoder_headers() + threads - - - git-svn-id: svn://svn.videolan.org/x264/trunk@612 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dfe7bb017bface15c9ed47ab5924062300a79e5c -Author: Loren Merritt -Date: Sat Dec 16 00:46:37 2006 +0000 - - don't skip autodection on configure --enable-pthread - - - git-svn-id: svn://svn.videolan.org/x264/trunk@611 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ab3b6602d733c9382a17b9e20a4478efd7ae5994 -Author: Loren Merritt -Date: Sat Dec 16 00:32:38 2006 +0000 - - more win32threads -> pthreads - - - git-svn-id: svn://svn.videolan.org/x264/trunk@610 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cc753d6ba6634cfeee9c8b461bc996adf7e1aee6 -Author: Loren Merritt -Date: Fri Dec 15 23:08:57 2006 +0000 - - cosmetics: rename list operators to be consistent with Perl, and move them to common/ - - - git-svn-id: svn://svn.videolan.org/x264/trunk@609 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 87f1430384a1c61035a4ca595e5defecf4b64cb8 -Author: Loren Merritt -Date: Fri Dec 15 23:06:21 2006 +0000 - - win32: use pthreads instead of win32threads. for some reason, pthreads is much faster. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@608 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7b4f6a1fd95c7e0ab479e116fe59e66e5d1fd107 -Author: Loren Merritt -Date: Fri Dec 15 23:03:36 2006 +0000 - - New threading method: - Encode multiple frames in prallel instead of dividing each frame into slices. - Improves speed, and reduces the bitrate penalty of threading. - - Side effects: - It is no longer possible to re-encode a frame, so threaded scenecut detection - must run in the pre-me pass, which is faster but less precise. - It is now useful to use more threads than you have cpus. --threads=auto has - been updated to use cpus*1.5. - Minor changes to ratecontrol. - - New options: --pre-scenecut, --mvrange-thread, --non-deterministic - - - git-svn-id: svn://svn.videolan.org/x264/trunk@607 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fa2c1e5430619c6011dfe2ffbebbd59557afa228 -Author: Sam Hocevar -Date: Tue Dec 12 02:17:44 2006 +0000 - - * Do not assume anything about sizeof(cpu_set_t). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@606 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 167abec138939a7475b481c6cf4a3738601f279a -Author: Sam Hocevar -Date: Mon Dec 11 16:01:49 2006 +0000 - - * Add support for kFreeBSD (FreeBSD kernel with GNU userland). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@605 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e8cc72c7acc75df584cece7da9176eb4be6c9d36 -Author: Guillaume Poirier -Date: Mon Nov 27 21:40:21 2006 +0000 - - Add Altivec implementations of add8x8_idct8, add16x16_idct8, sa8d_8x8 and sa8d_16x16 - Note: doesn't take advantage of some possible aligned memory accesses, so there's still room for improvement - - - git-svn-id: svn://svn.videolan.org/x264/trunk@604 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 575b238cb5e5bdb18b5541456d6eb4781bcd36d5 -Author: Eric Petit -Date: Sat Nov 25 16:31:24 2006 +0000 - - Force alignment of the fake .rodata on MacIntel - - - git-svn-id: svn://svn.videolan.org/x264/trunk@603 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4659f518e453250783fd13bedda8a8560e8d32a5 -Author: Loren Merritt -Date: Thu Nov 23 03:13:18 2006 +0000 - - don't treat vbv_maxrate as a minrate too if it's higher than target average bitrate. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@602 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c494e9c5e22d0fbe30deb7f2c1b932ac9fb449b5 -Author: Eric Petit -Date: Sat Nov 18 14:38:07 2006 +0000 - - Merges Guillaume Poirier's AltiVec changes: - * Adds optimized quant and sub*dct8 routines - * Faster sub*dct routines - ~8% overall speed-up with default settings - - - git-svn-id: svn://svn.videolan.org/x264/trunk@601 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 41c111bc13badf2f7fa71165c94d4b528213a8cd -Author: Loren Merritt -Date: Mon Nov 6 22:49:41 2006 +0000 - - 10% faster deblock mmx functions. ported from ffmpeg. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@600 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6eeb78f6a8bc0541ea68fb84ba41c456e56a2589 -Author: Loren Merritt -Date: Mon Nov 6 22:38:42 2006 +0000 - - checkasm: ignore insignificant differences in floating-point ssim - - - git-svn-id: svn://svn.videolan.org/x264/trunk@599 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 485172deeb75ca1eb88ac24b6ab6719b8a16825b -Author: Loren Merritt -Date: Mon Oct 30 02:31:48 2006 +0000 - - display final ratefactor in abr when a loose vbv is applied. (still disabled in true cbr) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@598 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a9d754a37787daccdd88e4d08ec95f9b9ae59a8c -Author: Loren Merritt -Date: Mon Oct 30 00:09:21 2006 +0000 - - fix parsing of --deblock %d,%d (beta was ignored) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@597 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ccac553deccf9646fdc53728f91caa988ee09176 -Author: Loren Merritt -Date: Sun Oct 29 05:48:57 2006 +0000 - - compute chroma_qp only once per mb - - - git-svn-id: svn://svn.videolan.org/x264/trunk@596 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 94a4aede9033614fa810ec88ca92dcd39822b544 -Author: Loren Merritt -Date: Sun Oct 29 01:17:33 2006 +0000 - - rd refinement of intra chroma direction (enabled in --subme 7) - patch by Alex Wright. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@595 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e7a4aba997a82cfd5c1d5217d413513d52b6af3e -Author: Loren Merritt -Date: Wed Oct 18 04:06:44 2006 +0000 - - fix a crash in avc2avi - - - git-svn-id: svn://svn.videolan.org/x264/trunk@594 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 41b85c4fa872df880352024aa2d707b1813a4258 -Author: Loren Merritt -Date: Sun Oct 15 23:39:03 2006 +0000 - - skip deblocking and motion interpolation when using only I-frames - - - git-svn-id: svn://svn.videolan.org/x264/trunk@593 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 197a94a8cfff879ef3691eb178b5b617bda03ffb -Author: Loren Merritt -Date: Fri Oct 13 23:50:57 2006 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@592 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 93b54ce1fbbbb191e69d8c6e33897037ba5bbfd1 -Author: Loren Merritt -Date: Fri Oct 13 20:04:58 2006 +0000 - - allow fractional values of crf - - - git-svn-id: svn://svn.videolan.org/x264/trunk@591 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8d1ebe2eeb30a204b588502d69d361ee85187821 -Author: Loren Merritt -Date: Tue Oct 10 21:26:31 2006 +0000 - - prefetch pixels for motion compensation and deblocking. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@590 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9fadbd7b82a1bd785e206dfef81af066c7470a2f -Author: Loren Merritt -Date: Tue Oct 10 19:16:39 2006 +0000 - - fix a crash on interlace + >8 reference frames - - - git-svn-id: svn://svn.videolan.org/x264/trunk@589 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4107178cc2998dd37cca106898e16e833a99b50e -Author: Loren Merritt -Date: Tue Oct 10 05:05:55 2006 +0000 - - no more decoder. it never worked anyway, and the presence of defunct code was confusing people. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@588 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9243c844dfc47d6dea825a0a92bd8a990d6c3cac -Author: Loren Merritt -Date: Mon Oct 9 23:31:45 2006 +0000 - - compute pskip_mv only once per macroblock, and store it - - - git-svn-id: svn://svn.videolan.org/x264/trunk@587 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0a453377cdac1aa50a4f21be0ddfba2a93719603 -Author: Loren Merritt -Date: Mon Oct 9 20:55:54 2006 +0000 - - slightly faster chroma_mc_mmx - - - git-svn-id: svn://svn.videolan.org/x264/trunk@586 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 42bb1b494e19732fb510fb42f30bf5ae0a66dd71 -Author: Loren Merritt -Date: Mon Oct 9 17:44:47 2006 +0000 - - missing emms in plane_copy_mmx - - - git-svn-id: svn://svn.videolan.org/x264/trunk@585 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 43e4162f3026f964d0bcf4502afe13cdaaa53f4d -Author: Loren Merritt -Date: Fri Oct 6 23:25:41 2006 +0000 - - merge center_filter_mmx with horizontal_filter_mmx - - - git-svn-id: svn://svn.videolan.org/x264/trunk@584 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c485e7e75bf24036c9438467ba854bd17122e277 -Author: Loren Merritt -Date: Fri Oct 6 05:43:53 2006 +0000 - - 1.5x faster center_filter_mmx (amd64) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@583 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 299d3ed4ee3c8e875a428bf872bfe791a7ccd687 -Author: Loren Merritt -Date: Fri Oct 6 00:02:59 2006 +0000 - - mmx/prefetch implementation of plane_copy - - - git-svn-id: svn://svn.videolan.org/x264/trunk@582 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9f05a7fdfc08e0279a825dc851b61c61063564b3 -Author: Loren Merritt -Date: Thu Oct 5 08:15:55 2006 +0000 - - no more vfw - - - git-svn-id: svn://svn.videolan.org/x264/trunk@581 df754926-b1dd-0310-bc7b-ec298dee348c - -commit aabb91c80b714e9a8e2bbdca8beaf5655e9e9540 -Author: Loren Merritt -Date: Thu Oct 5 07:44:22 2006 +0000 - - gtk fixes: - in Makefile - - fix datadir for mingw users - - remove the shared lib during the clean rule - - use $(ENCODE_BIN) instead of x264_gtk_encode - - add some $(DESTDIR) and create some directories when necessary - - remove -lintl - statfile_length -> statsfile_length - fix the "sensitivity" of the widget of update_statfile - the logo is now handled correctly on windows - added: beginning of multipass support - patch by Vincent Torri. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@580 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 90a9219545cf073847160933d1c7d3e9b761e909 -Author: Loren Merritt -Date: Thu Oct 5 01:57:00 2006 +0000 - - accept mencoder's option names as synonyms (api only, not in x264cli) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@579 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cb8b5dad7dc99c13e591aaf74b84b406ea80b69e -Author: Loren Merritt -Date: Tue Oct 3 01:39:38 2006 +0000 - - simplify satd_sse2 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@578 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 04834f596bb9bec1e8afd7ab7b1eee74ad13df0a -Author: Loren Merritt -Date: Mon Oct 2 08:31:48 2006 +0000 - - better error checking in x264_param_parse. - add synonyms for a few options. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@577 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0cbf0fc27bd391509d788ebb2e06b930bf840925 -Author: Loren Merritt -Date: Mon Oct 2 02:46:23 2006 +0000 - - fix some strides that weren't a multiple of 16. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@576 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 58e12b0e792596ae4eac95df4ae358ca664a6c20 -Author: Loren Merritt -Date: Sun Oct 1 13:08:42 2006 +0000 - - tweak motion compensation amd64 asm. 0.3% overall speedup. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@575 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6de50f51151c29331040f8c24bf3697c063fe0dd -Author: Loren Merritt -Date: Sun Oct 1 08:06:22 2006 +0000 - - strip local symbols from asm .o files, since they confuse oprofile - - - git-svn-id: svn://svn.videolan.org/x264/trunk@574 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f9cc941183c0b190d09e030a30c537259c3e4088 -Author: Loren Merritt -Date: Sun Oct 1 07:25:01 2006 +0000 - - add an option to control direct_8x8_inference_flag, default to enabled. - slightly faster encoding and decoding of p4x4 + B-frames, - and is needed for strict Levels compliance. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@573 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8f0864a9f5755a02158cbb96c215afee95846639 -Author: Loren Merritt -Date: Sun Oct 1 03:05:15 2006 +0000 - - allow custom deadzones for non-trellis quantization. - patch by Alex Wright. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@572 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a7cd9cf8ba99e4703805d34eef494d47850f9b99 -Author: Loren Merritt -Date: Sun Oct 1 02:44:36 2006 +0000 - - move zigzag scan functions to dsp function pointers. - mmx implementation of interlaced zigzag. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@571 df754926-b1dd-0310-bc7b-ec298dee348c - -commit faec300a71cdb64e1bd27d393de51d2e3d1f5992 -Author: Loren Merritt -Date: Sun Oct 1 02:41:22 2006 +0000 - - support interlace. uses MBAFF syntax, but is not adaptive yet. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@570 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3b7857057005ebecc9852ef56c9f725d26b94bc4 -Author: Loren Merritt -Date: Wed Sep 27 06:37:19 2006 +0000 - - allow --zones in cqp encodes - - - git-svn-id: svn://svn.videolan.org/x264/trunk@569 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7960eaf2451a445078a3c55b1ade5efc1948e02f -Author: Loren Merritt -Date: Tue Sep 26 19:27:07 2006 +0000 - - cli: fix some typos in vui parameters from r542. - patch by Foxy Shadis. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@568 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1b93c2a5cccb6ad7aedfd2746c671dcfffe60795 -Author: Sam Hocevar -Date: Mon Sep 25 10:25:55 2006 +0000 - - * Add an "all" rule to the Makefile. Ideally "default" should be renamed, - but I don't want to break existing scripts. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@567 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 52b8a0530721abb276c286d95a7827da3529baa4 -Author: Loren Merritt -Date: Sun Sep 24 21:35:56 2006 +0000 - - workaround: on some systems, alloca() isn't aligned - - - git-svn-id: svn://svn.videolan.org/x264/trunk@566 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7bfc7360d713aa651b71691a9fb85f34104573e4 -Author: Loren Merritt -Date: Fri Sep 22 16:39:05 2006 +0000 - - missing picpop - - - git-svn-id: svn://svn.videolan.org/x264/trunk@565 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 460699ffdfaf263faaebd720c82475d1fb949279 -Author: Loren Merritt -Date: Wed Sep 13 19:24:13 2006 +0000 - - fix a buffer overread from r540 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@564 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 55c208edb7e3a3ad54c1c1412577003b1eb13a69 -Author: Loren Merritt -Date: Tue Sep 12 23:32:21 2006 +0000 - - cosmetics (spelling) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@563 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8850b6faaf55b83ed3aa86ff9fcb5e35c439b236 -Author: Loren Merritt -Date: Tue Sep 12 22:21:23 2006 +0000 - - faster ESA - - - git-svn-id: svn://svn.videolan.org/x264/trunk@562 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f8652aab3dda281aa446ead0674d7e1f1c6d6e74 -Author: Loren Merritt -Date: Tue Sep 12 22:18:29 2006 +0000 - - faster ESA - - - git-svn-id: svn://svn.videolan.org/x264/trunk@561 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a020d6ec4933d9f431be5bad429289b87efabe17 -Author: Sam Hocevar -Date: Sun Sep 10 17:37:13 2006 +0000 - - * Use the autotool's config.guess script instead of uname to check the - system and CPU types, to avoid issues when using for instance a 32-bit - userland on top of a 64-bit kernel. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@560 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 477c5bfb641d0c662a98a3474c6a2a441476b5a1 -Author: Sam Hocevar -Date: Sun Sep 10 17:16:21 2006 +0000 - - * Add the autotool's config.guess script so that we can use it instead - of uname in the configure script. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@559 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4a5a03bbba8a8100d84bf5c30709eec133dda282 -Author: Loren Merritt -Date: Tue Aug 22 07:43:14 2006 +0000 - - 10l in r553 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@558 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 73657d88d1e1371d684eea805fb88c008e44e96b -Author: Loren Merritt -Date: Mon Aug 21 00:46:20 2006 +0000 - - ssim broke on amd64 w/ pic. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@557 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1808700e26023fccfabec9e65ee7e4fb18ae57f2 -Author: Steve Lhomme -Date: Fri Aug 18 20:50:10 2006 +0000 - - MSVC compatibility fix from Haali - - git-svn-id: svn://svn.videolan.org/x264/trunk@556 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f78c224c21e991b55deb637d35b30b06a78d78da -Author: Loren Merritt -Date: Thu Aug 17 22:49:45 2006 +0000 - - support changing some more parameters in x264_encoder_reconfig() - - - git-svn-id: svn://svn.videolan.org/x264/trunk@555 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7760f1b2e78360542e31eb55db81e84dcb4f95ac -Author: Loren Merritt -Date: Thu Aug 17 21:57:59 2006 +0000 - - SSIM computation. (default on, disable by --no-ssim) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@554 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 127e2fbf0a338549b00f6a3022ce1d2bab1d2acb -Author: Loren Merritt -Date: Wed Aug 16 20:13:06 2006 +0000 - - configure: --enable-debug reduces optimization to -O1 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@553 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dc5d530e13e0aa38218feb7c6c1fc4b75c0b7261 -Author: Loren Merritt -Date: Wed Aug 16 19:57:08 2006 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@552 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 12bd065367f7aa0a4efc550dd595142d783a9525 -Author: Loren Merritt -Date: Fri Aug 4 03:12:43 2006 +0000 - - gcc -fprofile-generate isn't threadsafe - - - git-svn-id: svn://svn.videolan.org/x264/trunk@551 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b3f15918ac03a0fc3b6fe1a8311fabedf5fa6b53 -Author: Loren Merritt -Date: Thu Aug 3 19:49:17 2006 +0000 - - cli: move some options from --help to --longhelp - - - git-svn-id: svn://svn.videolan.org/x264/trunk@550 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 64a8b781013148bd351c2a45fdcb6a9aaf26ff4a -Author: Loren Merritt -Date: Thu Aug 3 18:22:08 2006 +0000 - - cli: don't try to get resolution from filename unless input is rawyuv - - - git-svn-id: svn://svn.videolan.org/x264/trunk@549 df754926-b1dd-0310-bc7b-ec298dee348c - -commit de41ef2605bf1d5ded851888438a55a6bf66c42a -Author: Loren Merritt -Date: Thu Aug 3 18:13:56 2006 +0000 - - r542 broke --visualize - - - git-svn-id: svn://svn.videolan.org/x264/trunk@548 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1ff80a23db8912967238d1d9d4df2aebeeafbd1a -Author: Eric Petit -Date: Wed Aug 2 18:11:21 2006 +0000 - - Nicer OS X x264_cpu_num_processors (thanks David) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@547 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3493a54ddfd504fdd993c98a321916f98b96b09d -Author: Eric Petit -Date: Tue Aug 1 15:20:35 2006 +0000 - - Support OS X and BeOS in x264_cpu_num_processors - - - git-svn-id: svn://svn.videolan.org/x264/trunk@546 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 86ec16c126168adbb6f615159e26065b4b0000a7 -Author: Eric Petit -Date: Tue Aug 1 15:18:31 2006 +0000 - - Fixes contexts allocation with threads=auto - - - git-svn-id: svn://svn.videolan.org/x264/trunk@545 df754926-b1dd-0310-bc7b-ec298dee348c - -commit eeebca20ad854271fab898f306f1657887ca6588 -Author: Loren Merritt -Date: Tue Aug 1 02:22:36 2006 +0000 - - select initial qp for abr and cbr baased on satd and bitrate, rather than cq24. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@544 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9e9a869aa7dc3c39a883a5a5886c0bbd82e8b95c -Author: Loren Merritt -Date: Tue Aug 1 00:17:18 2006 +0000 - - --threads=auto to detect number of cpus - - - git-svn-id: svn://svn.videolan.org/x264/trunk@543 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0b07708cdab6c2f54673930c10d2908c53e19120 -Author: Loren Merritt -Date: Mon Jul 31 21:59:04 2006 +0000 - - api addition: x264_param_parse() to set options by name - - - git-svn-id: svn://svn.videolan.org/x264/trunk@542 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 99b8471e5032bfab1f81cb04fb350ee6fa878561 -Author: Loren Merritt -Date: Mon Jul 31 06:34:53 2006 +0000 - - fix a rare NaN in ratecontrol - - - git-svn-id: svn://svn.videolan.org/x264/trunk@541 df754926-b1dd-0310-bc7b-ec298dee348c - -commit adc4b4f85e3682bd4868df21babcea725c919bef -Author: Loren Merritt -Date: Sun Jul 30 02:39:05 2006 +0000 - - move quant_mf[] from x264_t to the heap, and merge duplicate entries - - - git-svn-id: svn://svn.videolan.org/x264/trunk@540 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 75d6edb847722b1058914a2effcc6d47c5b7971a -Author: Loren Merritt -Date: Fri Jul 28 21:39:07 2006 +0000 - - GTK update. patch by Vincent Torri. - fixed: - cleaning of Makefile - time elapsed seems broken ('total time' label replaced by 'time remaining') - text entries of the status window are now not editable - added: - compilation from x264/ (add --enable-gtk option to configure) - shared lib creation if --enable-shared is passed to configure - x264gtk.pc - --b-rdo, --no-dct-decimate - - - git-svn-id: svn://svn.videolan.org/x264/trunk@539 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4b2556f688c482e2cbda025a5468bb4853810f89 -Author: Loren Merritt -Date: Sun Jul 23 19:19:40 2006 +0000 - - new option: --qpfile forces frames types and QPs. - (intended for ratecontrol experiments, not for real encodes) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@538 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b8692db2efa98c719332979f6dc8fc39af8f1eff -Author: Loren Merritt -Date: Tue Jul 18 01:10:54 2006 +0000 - - api change: select ratecontrol method with an enum (param.rc.i_rc_method) instead of a bunch of booleans. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@537 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4e7bd4a1f038aeb538bfd07fc2d5ac67c041a0e9 -Author: Loren Merritt -Date: Sun Jul 16 18:28:39 2006 +0000 - - slightly faster mmx dct - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@536 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 637470c0dbbadf4b0d8b01c6c2179a4305f2b203 -Author: Loren Merritt -Date: Sun Jul 16 18:25:38 2006 +0000 - - OpenBSD build fixes. - patch by Vizeli Pascal (pvizeli at yahoo dot de) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@535 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c6213d2fbb3b9a1a23c7cbe023080e98ee8e35a6 -Author: Loren Merritt -Date: Sat Jul 8 17:56:22 2006 +0000 - - mc_chroma width2 mmx - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@534 df754926-b1dd-0310-bc7b-ec298dee348c - -commit eff6a5e204a9d9789b44d1ebf3de510f0f6d4334 -Author: Måns Rullgård -Date: Wed Jun 28 21:58:58 2006 +0000 - - make libx264.so symlink relative - - - git-svn-id: svn://svn.videolan.org/x264/trunk@533 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 360853117fccdd082a6654c34c2c7dc2577f10cd -Author: Loren Merritt -Date: Mon Jun 12 08:22:09 2006 +0000 - - GTK update. patch by Vincent Torri. - added: - direct=auto - no-fast-pskip - vbv - cqm - tooltips (without descriptions yet) - translations - `make clean` for .exe - when file exists, ask for override - fixes: - debug level bug - bitrate slider bug - mixed-refs can be set only if ref>1 - i8x8 can be set only if 8x8 transform is enabled - # of threads capped at 4 - fourcc can't be removed - cosmetics - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@532 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 918c7ef4540f8741ecc23d74f227abd594986fba -Author: Loren Merritt -Date: Wed May 31 23:55:35 2006 +0000 - - vfw installer: tweak nsis compression. - patch by Francesco Corriga. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@531 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b5d08311537cf047b1720243f866264abc5150d3 -Author: Eric Petit -Date: Tue May 30 10:05:56 2006 +0000 - - Fixed typo that caused x264_encoder_open to always fail - - - git-svn-id: svn://svn.videolan.org/x264/trunk@530 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 91bbfb98c003715a0a44c31053c6e361b5262995 -Author: Loren Merritt -Date: Tue May 30 07:07:55 2006 +0000 - - check some mallocs' return value - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@529 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0ac281dc97ac8c0b85165062d5f25227c9e0142f -Author: Loren Merritt -Date: Sun May 28 21:14:24 2006 +0000 - - make -> $(MAKE) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@528 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c9155d3e9fbe646e2504c09b5b751f7279900522 -Author: Loren Merritt -Date: Wed May 24 03:59:19 2006 +0000 - - convert non-fatal errors to message level "warning". - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@527 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ae4b97a2530da93b750ac6b525941d408294a216 -Author: Loren Merritt -Date: Mon May 22 23:07:58 2006 +0000 - - fix a memory alignment. (no effect on x86, but might be needed for other simd) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@526 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c832ac1af21e82f7418077c6e61d13819420fd61 -Author: Loren Merritt -Date: Fri May 19 20:10:41 2006 +0000 - - when using DEBUG_DUMP_FRAME, write decoded pictures in display order. - patch by Loic Le Loarer. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@525 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 361b283acbcc2d4b1ff8171ac2945449936e5b93 -Author: Loren Merritt -Date: Fri May 19 19:14:29 2006 +0000 - - non-referenced B-frames should have the same frame_num as the following ref frame, not the previous. - patch by Loic Le Loarer. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@524 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1ca108eb75a947ad86dc4ab88ecf6e69bd22d358 -Author: Loren Merritt -Date: Fri May 12 08:17:53 2006 +0000 - - set the SPS constraint_set[01]_flag based on the profile in use, just in case some decoder cares - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@523 df754926-b1dd-0310-bc7b-ec298dee348c - -commit de1af4c2e5528fae5918ddf00d6ae09a68ea2222 -Author: Loren Merritt -Date: Wed May 10 16:47:05 2006 +0000 - - msvc doesn't like C99 named array initializers - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@522 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7022fe85d84db4a5a95a7b4ac699423277638881 -Author: Loren Merritt -Date: Wed May 10 16:42:07 2006 +0000 - - allow sar=1/1. - patch by Loic Le Loarer. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@521 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0d88274d5d833030ee3f54dfd09ef039f341da01 -Author: Loren Merritt -Date: Wed May 10 06:09:48 2006 +0000 - - faster intra search: filter i8x8 edges only once, and reuse for multiple predictions. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@520 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3de28cd5878a8be64e1db80ff2453e79acb0040d -Author: Loren Merritt -Date: Tue May 9 06:11:42 2006 +0000 - - faster intra search: some prediction modes don't have to compute a full hadamard transform. - x86 and amd64 asm. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@519 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e63f25b44ed1bfd2ccd0ff1e7f1f453c6ba08179 -Author: Loren Merritt -Date: Sat May 6 17:12:23 2006 +0000 - - --sps-id, to allow concatenating streams with different settings. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@518 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 609deaf54ad74816ba061c7022172986694049c9 -Author: Loren Merritt -Date: Wed May 3 17:59:23 2006 +0000 - - typo in expand_border_mod16 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@517 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f5bdc82806070eb101f3c6ab9a5370c4788d7597 -Author: Loren Merritt -Date: Sun Apr 30 01:21:49 2006 +0000 - - typo impaired 2pass bitrate prediction. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@516 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f51297f19065e4ab34179e6e1d785b28fb3ad6be -Author: Eric Petit -Date: Sat Apr 29 11:13:04 2006 +0000 - - Let the user choose the compiler with "CC=xxx ./configure" - - - git-svn-id: svn://svn.videolan.org/x264/trunk@515 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e7141289a2f8d07168d19fc751bb302a9c32a79e -Author: Eric Petit -Date: Sat Apr 29 11:12:16 2006 +0000 - - More vector types fixes for gcc 3.3 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@514 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f3323f8478176852ff8e974217cb59227bbb693e -Author: Eric Petit -Date: Fri Apr 28 17:13:37 2006 +0000 - - More vector casts to try and make compilers happier - - - git-svn-id: svn://svn.videolan.org/x264/trunk@513 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c1f64a50b7563b737c8938ed796f46d3bad354a4 -Author: Loren Merritt -Date: Tue Apr 25 04:08:21 2006 +0000 - - Use sa8d instead of satd for i8x8 search. - +.01 dB, -.5% speed - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@512 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8aa294381e5e7725a9ae01ce84d1a8f4ac86a8eb -Author: Loren Merritt -Date: Mon Apr 24 19:34:06 2006 +0000 - - Before evaluating the RD score of any mode, check satd and abort if it's much worse than some other mode. - Also apply more early termination to intra search. - speed at -m1:+1%, -m4:+3%, -m6:+8%, -m7:+20% - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@511 df754926-b1dd-0310-bc7b-ec298dee348c - -commit eb3d83c0a32636674f59bbd7f8fedd430f1e4c2c -Author: Sam Hocevar -Date: Mon Apr 24 19:01:10 2006 +0000 - - * common/ppc/pixel.c: fixed illegal implicit casts of vector types. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@510 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 17b90bf3c3f6a2c5711b68be39df784204b425b8 -Author: Sam Hocevar -Date: Mon Apr 24 18:49:50 2006 +0000 - - * Added %$#@#$! support for #@%$!#@ armv4l CPU. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@509 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 35a5a4f121667717bfd783d3dcbb6e79fc7c8668 -Author: Loren Merritt -Date: Mon Apr 24 08:27:26 2006 +0000 - - When evaluating predictors to start fullpel motion search, use subpel positions instead of rounding to fullpel. - about +.02 dB, -1.6% speed at subme>=3 - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@508 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6b577361fbab9d785787eba3e16a63a23d84be28 -Author: Loren Merritt -Date: Mon Apr 24 03:52:55 2006 +0000 - - mmx implementation of x264_pixel_sa8d - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@507 df754926-b1dd-0310-bc7b-ec298dee348c - -commit af751ac37f6397567696ba7eb2479f72ea2c2004 -Author: Loren Merritt -Date: Thu Apr 20 23:48:46 2006 +0000 - - 10l in r463 (q0 i16x16 dc was permuted) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@506 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2735ae7056413fb6d4461618269b971e8ae45915 -Author: Loren Merritt -Date: Thu Apr 20 20:33:25 2006 +0000 - - typo in r504 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@505 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 656b0698c90b7c3b3365416391b6d3706d1afb0d -Author: Loren Merritt -Date: Thu Apr 20 04:38:45 2006 +0000 - - update msvc project files. - patch by anonymous. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@504 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2f95856be50ec7c744ad4d65408846d3dce75491 -Author: Loren Merritt -Date: Wed Apr 19 09:02:19 2006 +0000 - - Before, we eliminated dct blocks containing only a small single coefficient. Now that behavior is optional, by --no-dct-decimate. - based on a patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@503 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9a6815d3cb06e48888bdd1d804ae9eb72adf71bb -Author: Eric Petit -Date: Mon Apr 17 11:08:58 2006 +0000 - - Enables more agressive optimizations (-fastf -mcpu=G4) on OS X. - Adds AltiVec interleaved SAD and SSD16x16. - Overall speedup up to 20%. - - Patch by anonymous - - - git-svn-id: svn://svn.videolan.org/x264/trunk@502 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 97ab2190599297ab0edaa62b8a7027117ca74ed5 -Author: Loren Merritt -Date: Mon Apr 17 01:19:47 2006 +0000 - - faster cabac_encode_bypass - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@501 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1927b27504ebee226b5eb1a51461b35e72a6d542 -Author: Eric Petit -Date: Sun Apr 16 18:24:38 2006 +0000 - - restored AltiVec dct - - - git-svn-id: svn://svn.videolan.org/x264/trunk@500 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d0a556549b9ae59c3a30b6c1b0280e6857350da3 -Author: Eric Petit -Date: Sun Apr 16 16:38:16 2006 +0000 - - more AltiVec mc, ~4.5% overall speedup - - - git-svn-id: svn://svn.videolan.org/x264/trunk@499 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71f11146131d1804311d86535a6aa7d0ff777501 -Author: Loren Merritt -Date: Wed Apr 12 21:21:59 2006 +0000 - - slightly faster loopfilter - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@498 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d2ab724f262f831a320ba75b81092bc182bca695 -Author: Loren Merritt -Date: Wed Apr 12 06:28:52 2006 +0000 - - 3% faster satd_mmx - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@497 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a23a3678b474450876ac297c979bb2ad27afe6f4 -Author: Loren Merritt -Date: Wed Apr 12 00:45:07 2006 +0000 - - cosmetics in sad/ssd/satd mmx - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@496 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b3ad52d4860127a9b7348923671b595b98cb4d09 -Author: Loren Merritt -Date: Tue Apr 11 21:16:44 2006 +0000 - - store quoted configure options. needed e.g. for multiple args under --extra-cflags. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@495 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c293b80b3b57789bbac663f0eac0ee26c1bd8eec -Author: Loren Merritt -Date: Tue Apr 11 10:45:00 2006 +0000 - - fix a yasm-incompatible syntax in x86 asm - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@494 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ae82d2423aa1d54eb367ca292262bc6bd3dec134 -Author: Loren Merritt -Date: Tue Apr 11 02:41:43 2006 +0000 - - yasm noexec stack - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@493 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 283d57ed95a22f104e4f22a6119e2aaeeca39833 -Author: Loren Merritt -Date: Mon Apr 10 18:46:54 2006 +0000 - - more interleaved SAD. - 25% faster halfpel. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@492 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0f4c0eb836912fcbd2376c920a9dd7bf438f4e43 -Author: Loren Merritt -Date: Mon Apr 10 17:56:02 2006 +0000 - - more interleaved SAD. - 1% faster umh, 6% faster esa. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@491 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8947b51f35151f821c3718b01c1e93d517d814b5 -Author: Loren Merritt -Date: Mon Apr 10 03:03:13 2006 +0000 - - interleave multiple calls to SAD. - 15% faster fullpel motion estimation. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@490 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bddf5f03ff621a8fdbcc9925453573600984b27d -Author: Sam Hocevar -Date: Sun Apr 9 13:20:17 2006 +0000 - - * Added support for ppc64. I'm really fucking tired of having to do this. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@489 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ac4249d20d47f75ca9aebb04c4b329c2d497100c -Author: Måns Rullgård -Date: Sat Apr 8 01:33:13 2006 +0000 - - use LDFLAGS when linking shared lib - - - git-svn-id: svn://svn.videolan.org/x264/trunk@488 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 85ab23ceca033df09e17d650647cba3d4995f9e8 -Author: Felix Paul Kühne -Date: Wed Mar 29 06:37:55 2006 +0000 - - * compilation fix for mingw, darwin (off_t was undefined) - - git-svn-id: svn://svn.videolan.org/x264/trunk@487 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 540ed9aafdf7577cf51914676dfc010952c76052 -Author: Loren Merritt -Date: Mon Mar 27 08:11:37 2006 +0000 - - GTK: support yuv4mpeg input. - patch by Vincent Torri. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@486 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5fe3e77f3d9f20286feb1432a63d2d6652dc8777 -Author: Loren Merritt -Date: Sun Mar 26 20:54:33 2006 +0000 - - GTK: fix avs input - patch by Vincent Torri. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@485 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a84899e0b4fd16d49cb7085a275cd7bb1ce9f67c -Author: Loren Merritt -Date: Sun Mar 26 20:40:20 2006 +0000 - - cli: support yuv4mpeg input. - patch by anonymous. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@484 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e510c09889d83c5d7e85eca83036af36f1284b87 -Author: Loren Merritt -Date: Sun Mar 26 01:09:09 2006 +0000 - - GTK: compilation fixes - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@483 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 845feff6507dc682944a8c733f8ee1c8a4da6f09 -Author: Loren Merritt -Date: Sat Mar 25 23:26:07 2006 +0000 - - GTK: compilation fixes on mingw, - add avs input for the app (if avalaible), - add filters for the filechooser, - add icon for the main window. - patch by Vincent Torri. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@482 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fc34d38657b89418cb68576889c647a4aa5e8108 -Author: Loren Merritt -Date: Sat Mar 25 10:13:12 2006 +0000 - - GTK-based graphical frontend. - patch by Vincent Torri. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@481 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 50aadebc2de139f2003314bfbeea7d7ce1680901 -Author: Loren Merritt -Date: Sat Mar 25 10:06:15 2006 +0000 - - silence some gcc warnings - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@480 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5b6c5effb4c1864b692213cafef9c85e3623573c -Author: Loren Merritt -Date: Fri Mar 24 21:45:39 2006 +0000 - - use FDEC_STRIDE instead of a parameter in mmx dct - .5% speedup - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@479 df754926-b1dd-0310-bc7b-ec298dee348c - -commit da9158b3ec035e8261e6fe2c5fd77e073425ed08 -Author: Sam Hocevar -Date: Wed Mar 22 14:21:53 2006 +0000 - - * configure: support for 64 bits MIPS. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@478 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7c013538206535c3abd70eb56a00bed0dccb43c5 -Author: Loren Merritt -Date: Mon Mar 20 23:32:43 2006 +0000 - - 10l in r473 and stdin - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@477 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3b66f690bd8a7d1417cedf98aec0df2702338bb2 -Author: Loren Merritt -Date: Mon Mar 20 23:00:52 2006 +0000 - - RD subpel motion estimation (--subme 7) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@476 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48633d2afd50d9f15b83e6639024c382dd958c76 -Author: Loren Merritt -Date: Mon Mar 20 22:46:38 2006 +0000 - - cosmetics in cabac_mb_cbf - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@475 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 50f40fd2a3b63ef89e8f94085ef2ed971a408468 -Author: Loren Merritt -Date: Sun Mar 19 11:00:02 2006 +0000 - - separate --thread-input from --threads - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@474 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 36c25b664e28e4e15ce49242af6ac306eb6f7cca -Author: Loren Merritt -Date: Sat Mar 18 09:20:06 2006 +0000 - - if --threads > 1, then read the input stream in its own thread. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@473 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c20906a56b2f17a89b7ba4afc87e78202447e7fb -Author: Måns Rullgård -Date: Fri Mar 17 22:27:31 2006 +0000 - - FreeBSD uses ELF - - - git-svn-id: svn://svn.videolan.org/x264/trunk@472 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9c61b0cbd68304b8f2860dc2d7df401ad6839b81 -Author: Loren Merritt -Date: Fri Mar 17 22:10:34 2006 +0000 - - 10l in r470 on x86_64 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@471 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fdb64099b4da93ffa70af98aad85cc7c6fc564d0 -Author: Loren Merritt -Date: Fri Mar 17 21:36:27 2006 +0000 - - some mmxext functions really only required mmx. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@470 df754926-b1dd-0310-bc7b-ec298dee348c - -commit abffd18fe30bcc0daa344a7dcedab30ddc3e97f6 -Author: Loren Merritt -Date: Fri Mar 17 07:36:06 2006 +0000 - - simplify get_ref and mc_luma - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@469 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9eaa83d4c97cfceaeb6491ae8e7a74c0bd6f397b -Author: Loren Merritt -Date: Wed Mar 15 04:38:57 2006 +0000 - - b16x16 wpred analysis used wrong weight - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@468 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 926212b3f0ff67ffc8ea2e3a7b299c016a00404c -Author: Loren Merritt -Date: Sat Mar 11 03:32:37 2006 +0000 - - configure: --enable-shared for libx264.so - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@467 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8f79dcc217245ebdc4aba8be505526c7277c6d3c -Author: Loren Merritt -Date: Fri Mar 10 18:58:29 2006 +0000 - - wrong modulus when delta_qp = +26 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@466 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78f414d5646e018254339a9a5db08bdf69de6551 -Author: Loren Merritt -Date: Thu Mar 9 16:56:33 2006 +0000 - - 10l in vbv + 2pass - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@465 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d8e790ca7c0a524ea0aa01bf0d9020530e3dba9a -Author: Loren Merritt -Date: Thu Mar 9 15:59:08 2006 +0000 - - macroblock-level ratecontrol: improved vbv strictness, and improved quality when using vbv. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@464 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 79389771d6bc84a886b754bba995e7d9ac8b48d4 -Author: Loren Merritt -Date: Thu Mar 9 05:30:08 2006 +0000 - - keep transposed dct coefs. ~1% overall speedup. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@463 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ce9b3336d66cff23019d43656baf425491702727 -Author: Loren Merritt -Date: Thu Mar 9 05:24:02 2006 +0000 - - tweak rounding of 8x8dct - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@462 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9331f05948dfcd42461a8aa8b6f0994e594dc74a -Author: Loren Merritt -Date: Wed Mar 8 19:58:30 2006 +0000 - - cosmetics in makefile - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@461 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 058c3be5405df19bb0f029956c54251d626ca0f0 -Author: Loren Merritt -Date: Wed Mar 8 16:03:30 2006 +0000 - - cosmetics: muxers -> muxers.c - - - git-svn-id: svn://svn.videolan.org/x264/trunk@460 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a34eec24d5caed8b43f2d1ecf7a0f36b9fe60189 -Author: Loren Merritt -Date: Mon Mar 6 18:38:44 2006 +0000 - - no --nr in intra blocks. intra prediction doesn't work well enough for the residual to be indicative of noise. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@459 df754926-b1dd-0310-bc7b-ec298dee348c - -commit afbbaf9b0229751fe545e1ac8b8f1ca68228d56a -Author: Loren Merritt -Date: Mon Mar 6 03:21:38 2006 +0000 - - 10l in direct auto + multiref + 1pass - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@458 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9773268370492490235dee06d46e091f563626d7 -Author: Loren Merritt -Date: Sun Mar 5 07:01:58 2006 +0000 - - --direct auto - selects direct mode per frame. works best in 2pass (enable in both passes). - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@457 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 126ccb3360e4c9b92ced4c995e618e4129be97a2 -Author: Loren Merritt -Date: Sun Mar 5 06:59:06 2006 +0000 - - change default direct mode to spatial - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@456 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 39c6d0824e23a3e6e769812968082026a9df61f8 -Author: Loren Merritt -Date: Sun Mar 5 06:28:40 2006 +0000 - - remove TODO. most of it is done, and the rest is out of date. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@455 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 918a791bb4ed544368cd7389147ab9e18fb6f8d4 -Author: Loren Merritt -Date: Sun Mar 5 02:26:32 2006 +0000 - - more amd64 mmx intra prediction - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@454 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 469b4e5032b8183dd04c8cb6f22e9724146bb2f5 -Author: Loren Merritt -Date: Sun Mar 5 02:16:19 2006 +0000 - - for i8x8 neighbors, don't assume a new slice starts at the edge of the frame - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@453 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4fe3aff6fd991a72b8a80b2157092678b13db433 -Author: Sam Hocevar -Date: Sat Mar 4 02:49:44 2006 +0000 - - * common/i386/i386inc.asm: got PIC to work for real on OS X x86. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@452 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4139febfe8acad10fb759b9d5a8992ed8cad6234 -Author: Sam Hocevar -Date: Thu Mar 2 20:48:08 2006 +0000 - - * common/i386/*.asm: don't use the "GLOBAL" reserved word, some versions - NASM complain about it. Replaced it with "GOT_ebx". - - - git-svn-id: svn://svn.videolan.org/x264/trunk@451 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 059410eda5d125c166b5ba050e3ca8152e84a2c7 -Author: Sam Hocevar -Date: Thu Mar 2 20:46:54 2006 +0000 - - * configure: activate minor nasm optimisations, such as assembling - "add eax, 8" as "add eax, byte 8". - - - git-svn-id: svn://svn.videolan.org/x264/trunk@450 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b5fda5741bf36e3f03cf0ebac90ddd9dce4f6420 -Author: Sam Hocevar -Date: Wed Mar 1 22:21:47 2006 +0000 - - * common/i386: factored the .rodata section declaration into i386inc.asm. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@449 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 17683d75b1dae07e8dfc901883231b41e73fe8cd -Author: Sam Hocevar -Date: Wed Mar 1 22:16:16 2006 +0000 - - * configure common/i386/i386inc.asm: got rid of -DFORMAT_* nasm flags - and use built-in preprocessor tests instead. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@448 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3723deea1601ba0dbda44ce09f77d6e1019226ac -Author: Sam Hocevar -Date: Wed Mar 1 22:12:22 2006 +0000 - - * common/i386/i386inc.asm: tell the ELF linker about our stack properties - so that it does not assume the stack has to be executable. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@447 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 70c140345c62ecb13ffa1af412f1e8f9f10567d2 -Author: Loren Merritt -Date: Tue Feb 28 19:49:00 2006 +0000 - - 10l in r443 (p4x4 chroma) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@446 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3bd4ade21dcbf30287f3a350c25bda26ea667d22 -Author: Loren Merritt -Date: Mon Feb 27 07:31:36 2006 +0000 - - copy current macroblock to a smaller buffer, to improve cache coherency and reduce stride computations. - part 3: asm - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@445 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4ecb5f8ed11073fd4e6a4673a1275c430478aefc -Author: Loren Merritt -Date: Mon Feb 27 07:29:24 2006 +0000 - - copy current macroblock to a smaller buffer, to improve cache coherency and reduce stride computations. - part 2: intra prediction - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@444 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8f05dffc4574c40557a5a161b18c4e6037aeec48 -Author: Loren Merritt -Date: Mon Feb 27 07:28:20 2006 +0000 - - copy current macroblock to a smaller buffer, to improve cache coherency and reduce stride computations. - part 1: memory arrangement. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@443 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 388658234c05e9b282569be052a5977d6cc9e812 -Author: Loren Merritt -Date: Mon Feb 27 07:18:23 2006 +0000 - - h->mc.copy() - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@442 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 34cbb9170c3b9daeae91ef4aa2a48c2ec9bdfbc8 -Author: Loren Merritt -Date: Mon Feb 27 06:32:43 2006 +0000 - - lowres intra used wrong neighboring pixels - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@441 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bca81cae09973cd349382d8612ad6aaf412444b4 -Author: Loren Merritt -Date: Thu Feb 23 22:46:21 2006 +0000 - - trellis=2 slightly affected intra analysis even without subme=6 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@440 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e5ed306c33a9b03d083660bb521758c76bdf36bd -Author: Sam Hocevar -Date: Thu Feb 16 22:00:46 2006 +0000 - - * encoder/ratecontrol.c: OS X support for exp2f and sqrtf. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@439 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 14b26394bd35f6ab03c6d4b7424ddea893a5bfa1 -Author: Loren Merritt -Date: Thu Feb 16 01:32:56 2006 +0000 - - allow delta_qp > 26 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@438 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5fbca87deb6e3a79b9d3b6b31ea85fc79e49534f -Author: Loren Merritt -Date: Tue Feb 14 01:21:43 2006 +0000 - - ratecontrol didn't always account for header bits, causing an undersize in multipass with --ratetol inf. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@437 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 12b778b8f4e8501cf06ba3513fc0e824d4a87ac1 -Author: Loren Merritt -Date: Mon Feb 13 17:36:22 2006 +0000 - - -q0 --b-rdo wasn't lossless - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@436 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 14f3cc06834e207b37222d93b7c6aea47b17524d -Author: Loren Merritt -Date: Mon Feb 13 04:34:15 2006 +0000 - - cosmetics - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@435 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 476a0f93c9aa42a39c3518f891e024ef41b1056e -Author: Loren Merritt -Date: Sun Feb 12 06:50:19 2006 +0000 - - allow ',' separator for --filter - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@434 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8dfd87aeef4c1523d60684ca6c1368007a24aad4 -Author: Loren Merritt -Date: Sun Feb 12 06:28:22 2006 +0000 - - VfW: 10l in bime and refs - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@433 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d53108a30cd1b1284c59eb9e8bdfac157a3ddb37 -Author: Loren Merritt -Date: Sun Feb 12 01:36:21 2006 +0000 - - more lowres mv clipping fixes - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@432 df754926-b1dd-0310-bc7b-ec298dee348c - -commit eb32d28463ab8433fba16851d4796d041b8de39f -Author: Loren Merritt -Date: Sat Feb 11 22:04:57 2006 +0000 - - VfW: cosmetics - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@431 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 60e848749dbcb8a44709675ea391f5c28a8a8c1f -Author: Loren Merritt -Date: Sat Feb 11 20:11:05 2006 +0000 - - VfW: support trellis, brdo, nr, bime. - patch by Dan Nelson (dnelson at allantgroup dot com). - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@430 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 681b394485671f977a1a19d2279ace4c22eb0177 -Author: Loren Merritt -Date: Fri Feb 10 21:58:43 2006 +0000 - - amd64 mmx for some intra pred functions - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@429 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e1d852d2947dfaac201dfb7149070ed341caa64f -Author: Loren Merritt -Date: Fri Feb 10 20:52:48 2006 +0000 - - dequant_mmx made incorrect assumptions about extreme inputs. now uses 32bit in more cases. - patch by Christian Heine. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@428 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fed2847ca9c4b9f8240be78145681d12ea85e1e9 -Author: Loren Merritt -Date: Fri Feb 10 01:16:40 2006 +0000 - - lowres can reuse the normal mv cost table - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@427 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f959a749aec65753e77a0b5566adb18d6a9af87f -Author: Loren Merritt -Date: Thu Feb 9 04:51:55 2006 +0000 - - r422 broke x264_center_filter_mmxext - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@426 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7125f9174c8d32f66bf9264c2b986dd1c03f4a27 -Author: Sam Hocevar -Date: Wed Feb 8 12:45:21 2006 +0000 - - * configure: define FORMAT_ELF under Linux and FORMAT_AOUTB under *BSD. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@425 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5d5c5cc213fb25a1fd151af80ab0a4fb614dd32c -Author: Sam Hocevar -Date: Wed Feb 8 11:07:06 2006 +0000 - - * common/i386/i386inc.asm: support for ELF, a.out and Mach-O objects. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@424 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78d31c22dcc1c42d6b2009b52ba758958dd1bff4 -Author: Sam Hocevar -Date: Wed Feb 8 09:32:03 2006 +0000 - - * configure: added a --enable-pic flag. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@423 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dc454eab263d463d2eeecf627aae31a10a5d080c -Author: Sam Hocevar -Date: Wed Feb 8 09:26:56 2006 +0000 - - * Additional fixes to the PIC versions of assembly routines. They now pass - all checkasm tests and output streams are bit-by-bit identical, which - sounds good. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@422 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ac9da5dbb4447c64bf9b82e849f4ae233c4413d3 -Author: Sam Hocevar -Date: Wed Feb 8 09:03:28 2006 +0000 - - * tools/checkasm.c: print the random seed used for the test, to allow for - replays. It looks like dequant_4x4 fails 1 time out of 600, with the - following seeds for instance: 1423 1957 2149 2455 3385 3403 3724 4095. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@421 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bb21f3a920ffefe84a77933c060775b2089a9c6c -Author: Loren Merritt -Date: Wed Feb 8 00:53:35 2006 +0000 - - cosmetics in mc_chroma - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@420 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 80b669bbc73e92944954cadc612650ba08b80358 -Author: Sam Hocevar -Date: Tue Feb 7 19:05:47 2006 +0000 - - * Oh, so what I thought was unused code was in fact used. This fixes my - breakage but makes the code rather slow in PIC mode. I will fix it later. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@419 df754926-b1dd-0310-bc7b-ec298dee348c - -commit eea893893af7ae49cd9cab333279f0323302db81 -Author: Sam Hocevar -Date: Tue Feb 7 17:40:56 2006 +0000 - - * Support for x86 position-independent code (PIC), needed for dynamic libs - on Mac OS X Intel. I tried to make this as little intrusive as possible. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@418 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 97f05071bb3aed659785ec10ccd6824020dfaef8 -Author: Loren Merritt -Date: Mon Feb 6 21:34:52 2006 +0000 - - msvc: #define isfinite() - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@417 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 19d07afabccda339852d7d49d3d5b11d538181f3 -Author: Loren Merritt -Date: Mon Feb 6 06:14:04 2006 +0000 - - x86 mmx for some intra pred functions - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@416 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d2dada763c6114b6245c6b468dfc2287123d12c5 -Author: Loren Merritt -Date: Mon Feb 6 05:53:44 2006 +0000 - - cosmetics: reorganize intra prediction dsp - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@415 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 791495e3d82b982ffa086593956bb96da45973e2 -Author: Loren Merritt -Date: Mon Feb 6 03:49:38 2006 +0000 - - too many systems don't have off_t; use uint64_t instead. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@414 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ce237ab663525259ce64423341ce6893309cee88 -Author: Loren Merritt -Date: Sat Feb 4 05:39:02 2006 +0000 - - fix order of frame evaluation in pre-me - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@413 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f116707e12e1ba980c8cf6a091f3290ba4d75af4 -Author: Loren Merritt -Date: Fri Feb 3 18:23:26 2006 +0000 - - update AUTHORS - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@412 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8b498e443950c642fd5f6c1406636f4ab5def27e -Author: Loren Merritt -Date: Thu Feb 2 04:37:05 2006 +0000 - - fix a check for NaN in ratecontrol - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@411 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e3b1f110b9eb2286e481febf29a015256a48c576 -Author: Loren Merritt -Date: Sun Jan 29 08:46:20 2006 +0000 - - fix mv predictors in pre-me for b-adapt. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@410 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6da9fc956cdafd6dde4c334568c18bd7bef292c1 -Author: Loren Merritt -Date: Mon Jan 23 02:02:50 2006 +0000 - - print --nr in sei params. tweak ratecontrol param checking. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@409 df754926-b1dd-0310-bc7b-ec298dee348c - -commit be6cce52d13fc6424e6244bfce03f67894c15d1e -Author: Måns Rullgård -Date: Thu Jan 19 00:05:42 2006 +0000 - - I've moved - - - git-svn-id: svn://svn.videolan.org/x264/trunk@408 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 273dc626b8b4f7cfe166be728038d9d3e9fd1fb7 -Author: Måns Rullgård -Date: Thu Jan 19 00:05:05 2006 +0000 - - write correct VUI timing info - - - git-svn-id: svn://svn.videolan.org/x264/trunk@407 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d125e4373da6f5e50d2e4ab73aab2d97732212c5 -Author: Loren Merritt -Date: Wed Jan 18 07:42:29 2006 +0000 - - early termination in UMH search - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@406 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a373f2aef22a54c8d597156429ff7f0ad41f1c9e -Author: Loren Merritt -Date: Wed Jan 18 07:08:01 2006 +0000 - - split mv_range enforcement from edge-of-frame clipping. fixes an occasional artifact with long mvs. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@405 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 61b57afb7b614ab09a0508c4c53ca411f4f675f8 -Author: Loren Merritt -Date: Wed Jan 18 04:29:18 2006 +0000 - - cosmetics: suppress warning on unused variables - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@404 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 271c1947a599ccdc3a509260da2d5cd6699148d7 -Author: Loren Merritt -Date: Tue Jan 17 21:16:28 2006 +0000 - - cosmetics: simplify #includes - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@403 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7fb7e61b6247d9571fe7244f03231adbcc1d7e75 -Author: Sam Hocevar -Date: Mon Jan 16 12:23:35 2006 +0000 - - * configure: NSLU2 platform support (why oh why) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@402 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1d82beb4c26d82d068e84265344e078854907c1e -Author: Eric Petit -Date: Sun Jan 15 22:29:15 2006 +0000 - - Re-enabled x86 optims on MacIntel, assume Nasm CVS is installed and - -f macho -DPREFIX just seems to do the job - - - git-svn-id: svn://svn.videolan.org/x264/trunk@401 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 096c4eb70a5b3cc5aebda44e62b4d3dd83edbc9c -Author: Eric Petit -Date: Sat Jan 14 16:11:48 2006 +0000 - - Quick compile fix for OS X / Intel - Optimizations are disabled at the moment. In order to get them to - work, we'd need either nasm to be able to output Mach-O object files, - or we should convert the assembly code to something OS X can handle, - like gas. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@400 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 743ad5971608e491bd1d70a31e9bfdc496301eef -Author: Loren Merritt -Date: Fri Jan 13 06:54:10 2006 +0000 - - cli: large file support - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@399 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0abf15d29e904339fb9f606e83e01c9265e54b15 -Author: Loren Merritt -Date: Tue Jan 10 07:31:29 2006 +0000 - - dct-domain noise reduction (ported from lavc) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@398 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6bf39eaa780ef0877b7d6fe8497df9a38d4baa3d -Author: Loren Merritt -Date: Mon Jan 9 06:18:39 2006 +0000 - - early termination within large SADs. ~1% faster UMH, ~4% faster ESA. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@397 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 73a45ef20dec4dc709e029f175eb20ae8eb099b9 -Author: Loren Merritt -Date: Thu Jan 5 19:11:38 2006 +0000 - - mkv: increase nalu size size to 4 bytes. - patch by Haali. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@396 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d3c2f10353e8409932f05be20c11f4eae09974c1 -Author: Loren Merritt -Date: Wed Jan 4 03:43:15 2006 +0000 - - less 64bit math: 12% faster trellis - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@395 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 28c0f2419db96278a14d126de3859a67d31d0a84 -Author: Loren Merritt -Date: Sun Jan 1 10:14:17 2006 +0000 - - more error checking of input parameters - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@394 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bf1e4d1faba2eff0f54029ccf4d98ce9ef09a757 -Author: Loren Merritt -Date: Sun Jan 1 09:39:29 2006 +0000 - - always write sps.vui - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@393 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7091b47e853fb45ae2d9432ea7ffe085efa31936 -Author: Loren Merritt -Date: Sat Dec 31 14:16:02 2005 +0000 - - use some extra packing modes for CQM headers. - fix typo in --cqm4p[yc]. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@392 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a977f764240cb9139c2152448bb85dd89260639f -Author: Loren Merritt -Date: Fri Dec 30 08:26:42 2005 +0000 - - MSVC compatibility fixes - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@391 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2fa8f84b6e108222735c2895b6419ed8c29ef031 -Author: Loren Merritt -Date: Fri Dec 30 04:56:49 2005 +0000 - - joint bidirectional motion refinement (--bime) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@390 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 684d2d58a5e60bec5bd45834e1c87b4b150c4244 -Author: Loren Merritt -Date: Sat Dec 24 20:59:13 2005 +0000 - - fix some overflows in mp4 timestamps. - patch by Francesco Corriga. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@389 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 25b40141a3d6569bfdc58a94d3004a89211029d6 -Author: Loren Merritt -Date: Tue Dec 20 02:57:52 2005 +0000 - - Successive elimination motion search: same as exhaustive search, but 2-3x faster. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@388 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a9607af8a776bb00aa463fa926fb4e4661eff1e4 -Author: Eric Petit -Date: Tue Dec 13 16:32:39 2005 +0000 - - Fixed cc_check on OS X (gcc -o /dev/null always fails) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@387 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b914f8081539c243a7a3f5a15a11145e06466da9 -Author: Loren Merritt -Date: Tue Dec 13 11:24:02 2005 +0000 - - postpone pskip decision until after p16x16ref0 motion search. - reduces the number of erroneous pskips in low-detail regions. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@386 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cc3308878925bf33c0e2707c9177dd345ed238a5 -Author: Loren Merritt -Date: Wed Dec 7 17:29:20 2005 +0000 - - configure: autodetect gpac, avis, pthread, vfw - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@385 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 38fcbfbeb53d402f9431f18709aee37987dcf318 -Author: Loren Merritt -Date: Mon Dec 5 12:46:46 2005 +0000 - - --no-fast-pskip - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@384 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5ce628fa0413b7d87e87619a65a9e1cabe5cd5be -Author: Loren Merritt -Date: Mon Dec 5 12:38:46 2005 +0000 - - cosmetics: config.h is now modified only by configure. make now calls configure if you haven't. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@383 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f03dbfd42d110e982323f78aac131024e0687590 -Author: Loren Merritt -Date: Sun Dec 4 21:19:17 2005 +0000 - - MP4: set "track enabled" flag. - patch by Robert Swain. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@382 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8443f260777556ebd6132f6448d406c769194e23 -Author: Loren Merritt -Date: Sat Dec 3 01:50:52 2005 +0000 - - faster subpel motion search. - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@381 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8f0d66cc0973cfb8360fad55b22248fe620def34 -Author: Loren Merritt -Date: Mon Nov 28 07:43:22 2005 +0000 - - don't use gnu extensions to grep and sed. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@380 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6826cf2faf2692b0bb37780148a89b0e58826f6b -Author: Loren Merritt -Date: Mon Nov 28 02:03:12 2005 +0000 - - pkg-config: major.minor.patch version - - - git-svn-id: svn://svn.videolan.org/x264/trunk@379 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8e44d938b225c0a4dabad257b471335fdd0fe18d -Author: Loren Merritt -Date: Mon Nov 28 00:29:10 2005 +0000 - - `make fprofiled` to automate gcc -fprofile-generate/use - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@378 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71b75efe735e76d8d6ec4b51cd09b477dc0908cc -Author: Loren Merritt -Date: Sun Nov 27 23:24:43 2005 +0000 - - 10l - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@377 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bdddcf97476ae25a8bd80339090c399b59b8c2f3 -Author: Loren Merritt -Date: Sun Nov 27 23:23:49 2005 +0000 - - param.b_repeat_headers (not yet used) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@376 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8c7611c14281f5d597262aa66771f0b9b50366a8 -Author: Loren Merritt -Date: Tue Nov 22 19:27:54 2005 +0000 - - support pkg-config. - patch by Caro. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@375 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78d2f605d0293484d50d58b74489700b65cc0472 -Author: Loren Merritt -Date: Tue Nov 22 06:59:48 2005 +0000 - - write encoding options to the userdata SEI and to the 2pass statsfile. - check for incompatible options in the 2nd pass. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@374 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c010ba1dde8fb861417e30c0d4316c6cb33064dd -Author: Loren Merritt -Date: Tue Nov 22 05:02:33 2005 +0000 - - change default level to "5.1" - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@373 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 05e6cf0516ff5646c841fa66d96be5e0264b0daa -Author: Loren Merritt -Date: Tue Nov 22 02:53:53 2005 +0000 - - skip dequant+idct of decimated blocks. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@372 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bc478923aefe1e4aa5e0201b2214f1ed8ad8f719 -Author: Loren Merritt -Date: Tue Nov 22 02:48:39 2005 +0000 - - after a 1pass ABR, print the value of --crf which would result in the same bitrate. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@371 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 528cbd1f16cd20b3ee8bbfd5b5edf6634a7f4634 -Author: Loren Merritt -Date: Tue Nov 22 02:43:11 2005 +0000 - - subpel search: always check mvp. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@370 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 429e0603017630feb239e22de3eb279ee02932c9 -Author: Loren Merritt -Date: Tue Nov 22 02:36:29 2005 +0000 - - faster b-rdo (skip RD of modes with bad SATD). - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@369 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6fe92323b2e007e3a31714fb5b090b732fc24e62 -Author: Loren Merritt -Date: Fri Nov 18 11:20:07 2005 +0000 - - RD mode decision for B-frames (--b-rdo) - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@368 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 322c42ad8ca9ac3d4755e62fadb29c3ef7a4ecf5 -Author: Sam Hocevar -Date: Fri Nov 11 23:57:18 2005 +0000 - - * common/amd64/quant-a.asm: added missing GLOBAL flags that prevented PIC - builds, thanks to Anssi Hannula. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@367 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ffd008ebdd6ebbf6f83dbf08315f3765a072261a -Author: Sam Hocevar -Date: Fri Nov 11 17:46:24 2005 +0000 - - * configure: added the Alpha platform. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@366 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9187a8f84ba24b2825487971ce94db404303393d -Author: Loren Merritt -Date: Mon Nov 7 07:53:42 2005 +0000 - - use array_non_zero() when we don't need a full array_non_zero_count() - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@365 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d18bbd3b2e28958b9e153b62033a7f66f6fea0ec -Author: Loren Merritt -Date: Sun Nov 6 07:07:30 2005 +0000 - - mmx dequant. up to 3% speedup w/ RD. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@364 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d447c2d3db71e0b422ed9330ac26410ba9f90622 -Author: Loren Merritt -Date: Sun Nov 6 00:26:43 2005 +0000 - - allow --level to understand names in addition to idc - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@363 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 87e5994706c76bd628c7e23f0dca95f05e922a7c -Author: Loren Merritt -Date: Fri Nov 4 11:39:58 2005 +0000 - - check (most of) the levels constaints. - set default max_mv_range based on level_idc. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@362 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1e80b69b3717e019f6cbab071582f9812b85fa4d -Author: Loren Merritt -Date: Thu Nov 3 22:57:52 2005 +0000 - - if p16x16 RD decides to code a MB as p_skip, then don't check smaller partitions. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@361 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5c43fb3b66b5ccf4ae0c4bd63599bf3f64d4557e -Author: Loren Merritt -Date: Thu Nov 3 22:20:47 2005 +0000 - - Trellis RD quantization. - around +.2 dB - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@360 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3f1ed7cee623b64fd66fc60db62275df23177966 -Author: Loren Merritt -Date: Thu Nov 3 20:16:56 2005 +0000 - - cosmetics: XCHG macro - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@359 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 662e56b59eee5ddc15e5fb8c53c7cd49bcc39eeb -Author: Loren Merritt -Date: Thu Nov 3 11:27:24 2005 +0000 - - skip a few duplicate candidates in qpel search. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@358 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2107a4f7204f0a764830b562e86d50f2b979a0b8 -Author: Loren Merritt -Date: Thu Nov 3 11:26:17 2005 +0000 - - skip a few duplicate candidates in fullpel hex&umh search. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@357 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 01c05a79022c2349ddba2f3e101f8a1d26500906 -Author: Loren Merritt -Date: Thu Nov 3 06:53:59 2005 +0000 - - cli: arithmetic overflow in bitrate printing - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@356 df754926-b1dd-0310-bc7b-ec298dee348c - -commit db67b818250aa75680df5ff15ff58418e850d321 -Author: Loren Merritt -Date: Thu Nov 3 06:47:19 2005 +0000 - - cosmetics in x264_cabac_mb_type - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@355 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 89d2c6a13fc2c864191af2ad86d07dee69a6c75b -Author: Loren Merritt -Date: Thu Nov 3 06:40:46 2005 +0000 - - X264_ABS => abs - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@354 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d13a18680572b8ae1075f9a2d53bf57b51eab6ec -Author: Loren Merritt -Date: Thu Nov 3 02:42:48 2005 +0000 - - amd64 sse2 8x8dct. 1.45x faster than mmx. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@353 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 08e19ed8f28e5bb1fdd951eb2bab04c0248f9af1 -Author: Loren Merritt -Date: Tue Nov 1 03:34:48 2005 +0000 - - allow 1pass ratecontrol with keyint=1 - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@352 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9617e25c2ea08d029decb106fa7cf51a13a03706 -Author: Loren Merritt -Date: Mon Oct 31 04:02:15 2005 +0000 - - cli: print estimated time left in --progress - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@351 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d484bce60eb3405b2d1bc666a61120dea6bbe294 -Author: Loren Merritt -Date: Mon Oct 31 02:52:33 2005 +0000 - - doc/ratecontrol.txt - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@350 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5aced82614a4be3106eec04ba983d122e9e7f668 -Author: Loren Merritt -Date: Mon Oct 31 02:50:36 2005 +0000 - - rm doc/dct.txt - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@349 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b179e4740f7624ea1be4db0682a658fe6822a9e8 -Author: Loren Merritt -Date: Sun Oct 30 23:42:24 2005 +0000 - - in constant QP mode, write that QP in the PPS to save a few bits in each slice header. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@348 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 108f197cb62a9f29b0b671e2eceafd8ccc4ded21 -Author: Loren Merritt -Date: Sun Oct 30 06:22:29 2005 +0000 - - faster decimation - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@347 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fa01979f7260543c845d0823d4a7c0774bcf5a16 -Author: Loren Merritt -Date: Sat Oct 29 04:05:57 2005 +0000 - - cosmetics: fix an erroneous warning from r340. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@346 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8d857c5601be89dc32d995c519c096805249f77f -Author: Loren Merritt -Date: Sat Oct 29 03:00:50 2005 +0000 - - cosmetics: change literal cabac_block_cat to an enum. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@345 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c636f90355a1855b0b2576d79d34541c063daee5 -Author: Loren Merritt -Date: Sat Oct 29 02:21:39 2005 +0000 - - cabac: merge i_state with i_mps. bs_write multiple bits at once. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@344 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5167ebb2bcfdd525d47abc91329d3588feab0b5f -Author: Loren Merritt -Date: Sat Oct 29 01:43:29 2005 +0000 - - remove unused adaptive cabac_idc code - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@343 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 817ef1468a80b20a76b4c12af44e3b85339880d5 -Author: Eric Petit -Date: Thu Oct 27 10:27:04 2005 +0000 - - Fixed compilation on PPC (spotted by David Wolstencroft) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@342 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 109ae085288c0068e2f40bfffd41070bd25dfa8b -Author: Loren Merritt -Date: Wed Oct 26 08:38:11 2005 +0000 - - mmx deblocking. - 2.5x faster deblocking functions, 1-4% overall. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@341 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 166601503800e00a33d88eb488da744a486ecb77 -Author: Loren Merritt -Date: Wed Oct 26 07:04:59 2005 +0000 - - If frame count is known at init time (cli & vfw), then abort if the 2nd pass - exceeds the length of the 1st pass. - If it's not known (mencoder), then report a non-fatal error when we run off the - end of the 1st pass stats, and switch to constant QP. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@340 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 75832019417943ed6a68b99bd75f5ef7efe1d998 -Author: Loren Merritt -Date: Wed Oct 26 06:40:51 2005 +0000 - - move checkasm to tools/ - delete unused stuff in testing/ - `make clean` deletes checkasm and avc2avi - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@339 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6347263823e0fce26593fe36d812ba95931ebcb0 -Author: Loren Merritt -Date: Wed Oct 26 06:31:35 2005 +0000 - - checkasm: check 8x8dct, mc average, quant, and SSE2. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@338 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 57900a1b0caa43372433b7bca25b26d764fadaff -Author: Loren Merritt -Date: Wed Oct 26 06:30:19 2005 +0000 - - r336 broke amd64 x264_pixel_sad_16x16_sse2 (though it's not being used) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@337 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 360eb55eda428cba8d6d4e411ff87e0d5dedbf05 -Author: Loren Merritt -Date: Tue Oct 25 10:57:29 2005 +0000 - - Windows 64bit asm. - patch by squid_80. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@336 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6d969739baac6b9f7e9bcb44c3b7dbc21890dd1b -Author: Loren Merritt -Date: Mon Oct 24 16:15:11 2005 +0000 - - delete build/cygwin because it's handled in the main configure/makefile. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@335 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0ddc9d5524a48882ac804948775fd7a35b3a07da -Author: Loren Merritt -Date: Sun Oct 23 09:52:34 2005 +0000 - - --crf: 1pass quality-based VBR. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@334 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 06f1dafd17e9ebb1cd9d271fd72eb0c04e2337bc -Author: Eric Petit -Date: Sun Oct 16 09:53:05 2005 +0000 - - Added --enable-gprof (patch by Johannes Reinhardt) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@333 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2ac5fe040b35546a7d7bc0b463fd4a9cb268ff3b -Author: Loren Merritt -Date: Sun Oct 16 05:44:50 2005 +0000 - - cosmetics: remove #if0'ed code - patch by Robert Swain. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@332 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1647e6d6e147b3e2072b4f36b1ed27df0715ff0d -Author: Loren Merritt -Date: Sun Oct 16 01:47:30 2005 +0000 - - faster bs_write - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@331 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b659ca6f53df6f7b1b423112ef0f95e7eb166ef5 -Author: Loren Merritt -Date: Sat Oct 15 04:11:06 2005 +0000 - - during RDO, skip the bitstream writing and just calculate the number of bits - that would be used. speedup: cabac +4-8%, cavlc +2-4%. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@330 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48c2e935e3638a38c988b11204ff52a85bf48fc9 -Author: Loren Merritt -Date: Sat Oct 15 00:27:17 2005 +0000 - - Use SAD instead of SATD for halfpel motion search. - Move multiref termination after halfpel search. - Total: 3-7% speedup and +/-.02 dB. - patch by Alex Wright. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@329 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a8ac858b06ddca09acd98c35456b1008412cbe94 -Author: Loren Merritt -Date: Thu Oct 13 18:19:38 2005 +0000 - - VfW: mixed refs. - patch by celtic_druid. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@328 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d69837d312aa09c020416008c26f7008783d8c7f -Author: Loren Merritt -Date: Mon Oct 10 22:51:26 2005 +0000 - - allow non-mod16 resolutions - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@327 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 67f2a4791ca35a019dd645818c2c95f2b88c936e -Author: Loren Merritt -Date: Mon Oct 10 01:29:17 2005 +0000 - - VfW: prevent duplicate free() in compress_end() - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@326 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0bde6ae12b9eda914fa51da95bef8beae09ea8f0 -Author: Loren Merritt -Date: Mon Oct 10 00:32:45 2005 +0000 - - cosmetics: remove declarations of nonexistent asm functions - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@325 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 015ac5865c81ee94125493aca28d0ccbc0f639b4 -Author: Loren Merritt -Date: Sun Oct 9 21:55:53 2005 +0000 - - cosmetics (whitespace) in VfW - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@324 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 54d413b9ad22244599489a0c50e99fafa07b89a1 -Author: Loren Merritt -Date: Sun Oct 9 21:37:56 2005 +0000 - - VfW: some reorganization - patch by Francesco Corriga. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@323 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a75462ead66beb222aae1efe1958848c26dc4be6 -Author: Loren Merritt -Date: Sun Oct 9 06:10:03 2005 +0000 - - cosmetics: merge some duplicate tables - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@322 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1c6ccbf543ac7725e46f94bdb24fa6784d315962 -Author: Loren Merritt -Date: Sun Oct 9 03:32:56 2005 +0000 - - remove cabac byte-stuffing code, because it just wastes bits in lossless, and does nothing at all at sane bitrates. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@321 df754926-b1dd-0310-bc7b-ec298dee348c - -commit acee2d5168a39f301b7cda1d4effe943e321e1f8 -Author: Loren Merritt -Date: Sun Oct 9 00:14:30 2005 +0000 - - don't allocate lowres planes if they won't be used (i.e. in the 2nd pass). - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@320 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 938c52d2a7285c5872eea2f5d165a1b26699b349 -Author: Loren Merritt -Date: Sat Oct 8 21:17:44 2005 +0000 - - cosmetics: move some stuff from macroblock_encode to cache_save - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@319 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a0012bf38d366b1b97e571fe27c665139f3c631c -Author: Loren Merritt -Date: Sat Oct 8 06:49:29 2005 +0000 - - new option: --mixed-refs - Allows each 8x8 or 16x8 partition to independently select a reference frame, as opposed to only one ref per macroblock. - patch mostly by Alex Wright (alexw0885 at hotmail dot com). - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@318 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 68592115c77b8fcd091b32f2d39d8e129a95bbef -Author: Loren Merritt -Date: Sat Oct 8 04:45:51 2005 +0000 - - cosmetics in option parsing - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@317 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4b925a1cfbdd6613449b70283cd6f80adbeb1f27 -Author: Loren Merritt -Date: Sat Oct 8 03:52:10 2005 +0000 - - expose the rest of the VUI flags. - patch by Christian Heine. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@316 df754926-b1dd-0310-bc7b-ec298dee348c - -commit aebad793a69d175b139da28aafff6dbfec81d7c1 -Author: Sam Hocevar -Date: Tue Oct 4 12:08:33 2005 +0000 - - * common/amd64/mc-a.asm: use RIP-relative addressing in PIC mode. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@315 df754926-b1dd-0310-bc7b-ec298dee348c - -commit db80497dd2e8bb0cd02c45d73ca74294b0671b61 -Author: Loren Merritt -Date: Tue Oct 4 07:12:21 2005 +0000 - - temporal predictors for 16x16 motion search. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@314 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7e165477fb69bc107e3fcfdac3e2cb53541870f6 -Author: Loren Merritt -Date: Sun Oct 2 22:07:12 2005 +0000 - - slightly faster/cleaner block_residual_write_cabac - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@313 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4c8ccfe6de4a44cd46bcaf1fc17ae90bfe34d958 -Author: Loren Merritt -Date: Sun Oct 2 20:12:46 2005 +0000 - - cosmetics - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@312 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ce3a422466b4df055f5b67116483eee20676939c -Author: Loren Merritt -Date: Sun Oct 2 05:50:35 2005 +0000 - - cli: fix a crash on piped input. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@311 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cb88eb7bf7756e25123cdfffdbdc49abc169ef33 -Author: Loren Merritt -Date: Sun Oct 2 05:01:39 2005 +0000 - - stats summary: separately report all 5 partition sizes, and add ref usages - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@310 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bab1d61dd306199747dd8f949bde2a49b20c6f70 -Author: Loren Merritt -Date: Sun Oct 2 04:03:06 2005 +0000 - - disposable frames shouldn't get their own coded_frame_num. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@309 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 31a36aa8621c7bd0264b87421e8d0a490d7c45f5 -Author: Loren Merritt -Date: Sat Oct 1 19:33:26 2005 +0000 - - typo in ia32 x264_pixel_avg_weight_w8_mmxext - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@308 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 458e63cadb0c6295273fd85def3aca0098a309e3 -Author: Loren Merritt -Date: Sat Oct 1 06:48:13 2005 +0000 - - mmx avg (already existed by not used for bipred) - mmx biweighted avg (3x faster than C) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@307 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3112619429c0cf781817867f0d124c882740d66f -Author: Loren Merritt -Date: Sat Oct 1 04:43:31 2005 +0000 - - cosmetics: move avg function ptrs from pixf to mc. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@306 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 82d5e6faa6aa8ca8888481019513782ef9701240 -Author: Loren Merritt -Date: Tue Sep 27 19:59:09 2005 +0000 - - with B-pyramid, forget old refs in POC order instead of coded order. - (before, b_skip was unavailable with pyramid and ref=1) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@305 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4223e3874f7268d9ea36f32a2150c3a123881f4b -Author: Loren Merritt -Date: Mon Sep 26 03:00:10 2005 +0000 - - typo in r296. - patch by lurui. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@304 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2a3417b0fe4a806480e49a6dd13ab8d625b03466 -Author: Sam Hocevar -Date: Sun Sep 25 22:12:56 2005 +0000 - - * common/amd64/*.asm: use RIP-related addressing in PIC mode. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@303 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 77997bffd6fdb3727c31f787767aafa11bc62266 -Author: Sam Hocevar -Date: Sun Sep 25 19:52:57 2005 +0000 - - * common/amd64/mc-a.asm: removed useless global variables - - - git-svn-id: svn://svn.videolan.org/x264/trunk@302 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b2e9af98bf5f44363f3877baf7bfa6cce4d64805 -Author: Sam Hocevar -Date: Sun Sep 25 13:52:58 2005 +0000 - - * configure: support extra $(ASFLAGS) through --extra-asflags. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@301 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1b16298a215393dc741fabb0e7212c0b0ee53846 -Author: Loren Merritt -Date: Sat Sep 24 19:41:50 2005 +0000 - - reorganized VfW UI. - patch by Antony Boucher, graphic by Jarod. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@300 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 35f641710900a39ea208860befc9cfe35043f7cd -Author: Loren Merritt -Date: Sat Sep 24 18:54:49 2005 +0000 - - MP4 output: update to GPAC 0.4 API. - patch mostly by Robert Swain. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@299 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cfebeac1a475f4a2ee57e5dd3cd1ff0c560f38db -Author: Loren Merritt -Date: Sat Sep 24 18:22:02 2005 +0000 - - faster mmx quant 15bit, and add 16bit version. total speedup: ~0.3% - patch by Christian Heine. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@298 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 49ac5e2f921ef940701e31ca7e6246e44480783b -Author: Loren Merritt -Date: Sat Sep 24 17:04:21 2005 +0000 - - faster mmx satd. *x16: 20%, *x8: 10%, total: 2-4%. - ia32 patch by Christian Heine, amd64 port by me. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@297 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 76192dcb1cc7720a1e633ba6b0fbdb2fbacbe9bb -Author: Loren Merritt -Date: Sat Sep 24 16:58:36 2005 +0000 - - allow i4x4 and i8x8 down-left prediction with emulated top-right samples. - based on a patch by Johannes Reinhardt (Johannes dot Reinhardt at uni-konstanz dot de) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@296 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 690a02b1c9132bfecc88068de757e6b0e5ef7b84 -Author: Steve Lhomme -Date: Tue Sep 20 16:18:23 2005 +0000 - - fps patch by Haali - - git-svn-id: svn://svn.videolan.org/x264/trunk@295 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8a5de70e926c334bcf422d21e8995b7be6ecf496 -Author: Sam Hocevar -Date: Tue Sep 20 15:50:41 2005 +0000 - - * configure: added support for ia64, mips/mipsel, m68k, arm, s390 and hppa - platforms, as well as linux sparc. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@294 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c4ffed4986fe4706c0c5c2b514ce95668f0b8393 -Author: Loren Merritt -Date: Wed Sep 14 17:20:17 2005 +0000 - - MMX quantization functions, and optimization of the C versions. - about 3x faster quant_8x8, quant_4x4, quant_4x4_dc, and quant_2x2_dc. total speedup: 4-10%. - patch by Alexander Izvorski and Christian Heine. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@293 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 16f423a00ee91f692d441a31fa99394543995582 -Author: Loren Merritt -Date: Sat Sep 10 11:23:09 2005 +0000 - - SSE2 pixel comparison functions - P4: SAD 16x*, SSD 16x*, SATD 16x*: 30% faster, SATD 8x8: 15% faster, total: 2-4% faster - K8: SSD 16x*: 6% faster, total: not much - patch by Alexander Izvorski. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@292 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6db6362c9d558d0acea9be1975344d217f453ab9 -Author: Loren Merritt -Date: Tue Aug 30 17:11:35 2005 +0000 - - 10l in rev290: duplicate declaration of x264_pixel_sub_8x8_mmx. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@291 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2d05702f88d1058b2ecd3945cd01269eb86829bb -Author: Loren Merritt -Date: Mon Aug 29 20:37:31 2005 +0000 - - mmx 8x8 dct. - On a K8: sub16x16_dct8 3806->1461, add16x16_idct8 4852->1297 cycles. total speedup: 1-3%. - patch by Christian Heine (sennindemokrit at gmx dot net) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@290 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2e5b0b93384f8d48e32b26beb6badb8a3236c29b -Author: Eric Petit -Date: Mon Aug 29 13:20:45 2005 +0000 - - VC++ fix (thx fenrir) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@289 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2a6e7a685391f4ae465c79111583c91fb26cb5a8 -Author: Eric Petit -Date: Mon Aug 29 11:20:23 2005 +0000 - - x264.h: issue an explicit warning when neither stdint.h nor inttypes.h - has be included before x264.h - - - git-svn-id: svn://svn.videolan.org/x264/trunk@288 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0403fed87a9cea867afa55d45500f6396c326659 -Author: Loren Merritt -Date: Wed Aug 17 15:18:42 2005 +0000 - - VfW: SAR wording. patch by Sharktooth. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@287 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48af1d03ed42e14b51f5e9c6986bd910aaab5b7a -Author: Loren Merritt -Date: Tue Aug 16 15:09:41 2005 +0000 - - cli: workaround to allow "--ratetol inf" on win32. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@286 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 796da8ed7e5ab52eb64d232c89e7e38bfa77215c -Author: Loren Merritt -Date: Tue Aug 9 18:48:57 2005 +0000 - - fix spatial direct mv prediction with B-pyramid. copied from libavcodec. - - git-svn-id: svn://svn.videolan.org/x264/trunk@285 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1459ac0dbca3f1f31557d9d8bb8911cb980aad6b -Author: Laurent Aimar -Date: Tue Aug 9 07:20:26 2005 +0000 - - * all: Patch by Mike Matsnev : - - "The following things were fixed: - * AR calculation was broken on previous import - * Wrong conditional in write_nalu_mkv() was fixed - * Error checking was added in all places" - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@284 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 47673d940a290207345bb13f08c371aa435e92a2 -Author: Laurent Aimar -Date: Tue Aug 9 07:17:26 2005 +0000 - - xyuv: bug fixes + autodetect of video size. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@283 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d9218cb35688033a78f936e963a4ca3572cfdb29 -Author: Eric Petit -Date: Sun Aug 7 17:17:05 2005 +0000 - - Run ranlib after make install (OS X needs that) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@282 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 205910672b5686174b7d6f0a1960d53cd4bd9f9b -Author: Loren Merritt -Date: Tue Jul 26 16:07:17 2005 +0000 - - update i_mb_b16x8_cost_table[] for I8x8 mb type (r278 only fixed a symptom). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@281 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d945b153baa4a81cb40a92e4c09b0e2f16081408 -Author: Laurent Aimar -Date: Fri Jul 22 15:51:10 2005 +0000 - - * all: Added matroska writing. Patch by Mike Matsnev. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@280 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5552052c55d550b48cb43d33cb3655ea53e4a273 -Author: Laurent Aimar -Date: Fri Jul 22 15:48:18 2005 +0000 - - * pixel.*: - - "I have completed additonal SAD implementations (8x16, 16x8 and 16x16) - using Sparc VIS. Overall speedup is roughly 90% from straight C. I'm - doing development and testing on a Sun Fire V220, with 2 * 1.5ghz - UltraSPARC-III CPUs. - - I've hand-unrolled each of the loops. Sun's assembler does not appear - to have macro functionality built-in and I didn't want to establish an - external dependancy on m4. Please let me know if you run into any - trouble with the patch." - - Patch by Phil Jensen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@279 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d2715116f9ef8d96d78e81010eda7fdee83cc212 -Author: Laurent Aimar -Date: Fri Jul 22 15:43:16 2005 +0000 - - analyse: "It correct the size of array i_mb_b16x8_cost_table - from 16 to 17,otherwise,it can result a mismatch of b16x8 - mb type cost and can result memory read overflow on it." Patch by lurui. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@278 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f52a280836003583a11b93883e68ac23881355ac -Author: Laurent Aimar -Date: Wed Jul 20 15:39:44 2005 +0000 - - * x264 compilation on NetBSD. Patch by Mike Matsnev. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@277 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 300e93ef08f5b389da3942474da8ec6fb9c62fda -Author: Laurent Aimar -Date: Wed Jul 20 15:27:18 2005 +0000 - - * all: "8x8 SAD written in Sparc Assembly using VIS." Patch by Phil Jensen. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@276 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1a0920f5c5c482d18dcbc775a542cb1529d019d0 -Author: Loren Merritt -Date: Fri Jul 15 16:21:58 2005 +0000 - - 10l: rd score for sub-8x8 partitions used wrong mvs. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@275 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0f34713af75421dcf3db067511d26d08ebe36134 -Author: Loren Merritt -Date: Wed Jul 13 15:52:59 2005 +0000 - - faster SAD_INC_2x16P for amd64. - patch by Josef Zlomek. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@274 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 86a01ef552f00fcc3225776bd41d7ebfb6507d0b -Author: Eric Petit -Date: Sun Jul 10 12:51:21 2005 +0000 - - Fixed win32 handle leakage (thanks Trax) - Default enabled support of threads on BeOS - - - git-svn-id: svn://svn.videolan.org/x264/trunk@273 df754926-b1dd-0310-bc7b-ec298dee348c - -commit da60272bf0c4b65128d673daef4d4d7c09c13ae3 -Author: Laurent Aimar -Date: Thu Jul 7 07:48:36 2005 +0000 - - * Add support for UltraSparc (uname -m: sun4u) with Solaris. - Patch by Tuukka Toivonen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@272 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 95c407157830f714c4914ceaeb850bebd198d14b -Author: Laurent Aimar -Date: Thu Jul 7 07:37:54 2005 +0000 - - * Faster SAD_INC_2x16P. Patch by Alexander Izvorski. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@271 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 90793358d78f5ad79aef3cc09ea80d5ea81bb53b -Author: Loren Merritt -Date: Tue Jun 21 14:49:27 2005 +0000 - - example quant matrix file - - - git-svn-id: svn://svn.videolan.org/x264/trunk@270 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7b1b45e8a6fc3e36447b7626617978dd7c9d5958 -Author: Loren Merritt -Date: Tue Jun 21 08:16:01 2005 +0000 - - --cqmfile reads quant matrices in a JM-compatible format. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@269 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7a77a1e7295b99a418c4fad2a5ab91f0dc896115 -Author: Loren Merritt -Date: Tue Jun 21 04:45:49 2005 +0000 - - adjust coded buffer size based on input resolution and QP (old default wasn't enough for HD lossless) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@268 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ca8ead2eb1ac51d9784af6fe7a6a3df1fbf10ada -Author: Loren Merritt -Date: Mon Jun 20 00:36:05 2005 +0000 - - update avc2avi for high profile - - - git-svn-id: svn://svn.videolan.org/x264/trunk@267 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1ab01bbc01bc482e9891fe843e1ddd14b7625540 -Author: Loren Merritt -Date: Mon Jun 20 00:08:28 2005 +0000 - - custom quant matrices - - - git-svn-id: svn://svn.videolan.org/x264/trunk@266 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2c4b31daae223b688feb4a6fdef36fce3b1bc6f0 -Author: Loren Merritt -Date: Fri Jun 17 08:32:56 2005 +0000 - - VfW: workaround a windows unicode bug. - patch by Leowai. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@265 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 396133936510d57bc2054dd1c1d3d92fa0eb5495 -Author: Loren Merritt -Date: Fri Jun 17 08:21:48 2005 +0000 - - lossless mode enabled at qp=0 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@264 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2f9a70c0a5b257eb1413601df191556547f307d5 -Author: Loren Merritt -Date: Tue Jun 14 19:49:16 2005 +0000 - - VfW: enable RDO. some option dependencies. - patch by Francesco Corriga. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@263 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 15ecd54fc67e75ccd380a7e36720f1a0c2514f94 -Author: Loren Merritt -Date: Tue Jun 14 19:19:52 2005 +0000 - - rate-distortion optimized MB types in I- and P-frames (--subme 6) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@262 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 41c37d9e05416a71c2499f788ea268032da0a6c4 -Author: Loren Merritt -Date: Sun Jun 12 23:17:12 2005 +0000 - - more VfW options. - patch mostly by celtic_druid. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@261 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a296ffcc5aa892d5281a9e6b2b4e863dd94e0b69 -Author: Loren Merritt -Date: Sat Jun 11 21:17:30 2005 +0000 - - VFW: 8x8 transform, SAR. - patch by celtic_druid. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@260 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7832f017704235b31c7a33b54a06ab196c1dcc4a -Author: Loren Merritt -Date: Sat Jun 11 20:32:22 2005 +0000 - - threads option in vfw. - patch by celtic_druid. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@259 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8c6e66479e66da8a9a79eacfec9fc2ff39a24464 -Author: Loren Merritt -Date: Sat Jun 11 19:27:02 2005 +0000 - - win32 threads enabled by default - - - git-svn-id: svn://svn.videolan.org/x264/trunk@258 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 96813e36dc54e1e9866dad24a8c0cc7a748f0d4a -Author: Loren Merritt -Date: Sat Jun 11 19:15:35 2005 +0000 - - vfw installer nsis script. - patch by Francesco Corriga. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@257 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8025723ee4a1c99e3e833ce963d05e5eb8c74606 -Author: Loren Merritt -Date: Sat Jun 11 05:52:38 2005 +0000 - - print 8x8 transform usage % in stats summary. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@256 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 26aa962acdc90204f7c915be91ead00ebcc5f30d -Author: Loren Merritt -Date: Wed Jun 8 17:16:20 2005 +0000 - - revert 216, another try at max_dec_frame_buffering. - disable adaptive cabac_idc by default; 0 is always best anyway. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@255 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c4f5de5230b584189c57db18f68d73f19d653d00 -Author: Loren Merritt -Date: Wed Jun 8 00:38:03 2005 +0000 - - typo in cabac tables - - - git-svn-id: svn://svn.videolan.org/x264/trunk@254 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2b5a6b2bd914a9d3ff9c304062c93f28c58ff532 -Author: Loren Merritt -Date: Sun Jun 5 20:39:58 2005 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@253 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 916136c96d49961ff944b6ef2feeedfc7a90af98 -Author: Loren Merritt -Date: Sun Jun 5 18:39:21 2005 +0000 - - fix i8x8 decision with chroma_me - - - git-svn-id: svn://svn.videolan.org/x264/trunk@252 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8614594835ce25879c0d01ca88625ea444d577f2 -Author: Loren Merritt -Date: Sun Jun 5 11:07:28 2005 +0000 - - SATD-based decision for 8x8 transform in inter-MBs. - Enable 8x8 intra. - CLI options: --8x8dct, --analyse i8x8. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@251 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6bf1398b824c013184548277eb8f2dbccd4d6fc5 -Author: Eric Petit -Date: Sun Jun 5 10:17:10 2005 +0000 - - Use win32 native threads (you still have to --enable-pthread to use - them, though) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@250 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 46a487299946e8a2130c3629bfaac1252ff068c4 -Author: Loren Merritt -Date: Sun Jun 5 01:09:38 2005 +0000 - - slightly faster 8x8 dct - - - git-svn-id: svn://svn.videolan.org/x264/trunk@249 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 398a6bf064d7ce46b0cb0edc66323473009d5e06 -Author: Loren Merritt -Date: Sat Jun 4 06:23:56 2005 +0000 - - remove unused tables from SPS/PPS. reduces overhead when syncing threads. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@248 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1729616639eada4977171af3611f3040113f1f01 -Author: Loren Merritt -Date: Fri Jun 3 09:58:25 2005 +0000 - - 10l (debug stuff in 246) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@247 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1ab45c8f7411f7b4453ddff66919910e823ed33b -Author: Loren Merritt -Date: Fri Jun 3 05:33:15 2005 +0000 - - 8x8 transform and 8x8 intra prediction. - (backend only, not yet used by mb analysis) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@246 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e46db68534f54a52c9df7595d8bd8fd4c8b21b53 -Author: Loren Merritt -Date: Wed Jun 1 06:49:00 2005 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@245 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7f988086c20dc28cafdec793af7900fcb477a25a -Author: Loren Merritt -Date: Wed Jun 1 05:31:39 2005 +0000 - - fix a bug with cabac + B-frames + mref + slices. - call visualization per frame instead of per slice. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@244 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b1f4d5b12789e6d608288b71ebefa59acf4fba86 -Author: Måns Rullgård -Date: Mon May 30 19:47:02 2005 +0000 - - accept the standard --prefix etc. options - - - git-svn-id: svn://svn.videolan.org/x264/trunk@243 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c77e709785fab74313a6c443c4f2f00fb9a86b70 -Author: Loren Merritt -Date: Mon May 30 01:52:00 2005 +0000 - - tweak cflags - - - git-svn-id: svn://svn.videolan.org/x264/trunk@242 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e85db920bb31a699b38c057f51a3eb68bb1b719d -Author: Eric Petit -Date: Sun May 29 20:27:09 2005 +0000 - - Fixed multithreading on BeOS (pthread emulation required) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@241 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 10851d0e11e90e814c37695aa244f113b21415f2 -Author: Loren Merritt -Date: Sun May 29 18:28:49 2005 +0000 - - multithreading (via slices) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@240 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 36f6321d4dd1b87331bec691ba1bdd3c6ec19b22 -Author: Loren Merritt -Date: Tue May 24 05:10:38 2005 +0000 - - move zones parsing to ratecontrol.c; allows passing in zones as a string. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@239 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 470e1b284f31e294119c7bc457a762488b34dd60 -Author: Loren Merritt -Date: Tue May 24 04:16:54 2005 +0000 - - UMHex motion seach (but no early termination yet) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@238 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c8b1a477d2d145698b065d7c20cd10be2f75e94d -Author: Loren Merritt -Date: Tue May 24 01:34:57 2005 +0000 - - Zoned ratecontrol. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@237 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0072b802fb9205be3606f45ec9cc6f5111c3ec3e -Author: Loren Merritt -Date: Mon May 23 08:57:02 2005 +0000 - - fix rounding of intra dequant when qp<=3 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@236 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7c02f091422b68fa01d48645eb2f04bbf409fb79 -Author: Loren Merritt -Date: Sat May 21 20:49:06 2005 +0000 - - API: x264_encoder_reconfig(). (not yet used by any frontend) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@235 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7fef6efa884a0fdad75181564a916ac94f81e3b3 -Author: Eric Petit -Date: Thu May 19 15:42:48 2005 +0000 - - Makefile: in target "install", first create the directories if they - don't already exist - - - git-svn-id: svn://svn.videolan.org/x264/trunk@234 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 809c516abe16bf051beb9d053d673a26906aa43c -Author: Eric Petit -Date: Sun May 15 20:19:22 2005 +0000 - - Optimized subXxX_dct - - - git-svn-id: svn://svn.videolan.org/x264/trunk@233 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f025abc9c0006c0a67d112afc6daff78c4fa7aad -Author: Eric Petit -Date: Sat May 14 15:49:36 2005 +0000 - - s/==/=/ - - - git-svn-id: svn://svn.videolan.org/x264/trunk@232 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 04ded39b9ba4e8f0b983efcc056292f25d544b9f -Author: Eric Petit -Date: Sat May 14 07:08:08 2005 +0000 - - ppc/: compile fixes for Linux/PPC (courtesy of Rasmus Rohde) and - for gcc < 4 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@231 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 94829ef6e277315e635df05d669848b5216f00d3 -Author: Loren Merritt -Date: Fri May 13 16:54:03 2005 +0000 - - visualize reference pic numbers. misc cleanups in visualization. - patch by Tuukka Toivonen. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@230 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4d5c7a033fbe7e7b168381a9fa15e8c2eb1a6a2f -Author: Eric Petit -Date: Fri May 13 15:30:18 2005 +0000 - - ppc/*: more tuning on satd (+5%) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@229 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e0bd8066395df74d5f2edc851c048512a0fed4ba -Author: Loren Merritt -Date: Fri May 13 08:03:42 2005 +0000 - - CLI option: --seek - - - git-svn-id: svn://svn.videolan.org/x264/trunk@228 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 036494a60f7850c1613c5084fe9a11c7821cb5a7 -Author: Loren Merritt -Date: Thu May 12 23:03:49 2005 +0000 - - CLI option: --visualize - Displays the encoded video along with MB types and motion vectors. - patch by Tuukka Toivonen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@227 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 31c91bd71f1cc7fd0988892657a3574dc534f628 -Author: Loren Merritt -Date: Thu May 12 19:48:10 2005 +0000 - - fix an uninitialized value in slicetype_analyse - - - git-svn-id: svn://svn.videolan.org/x264/trunk@226 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 92ea0c5c30a74408e931227765009ef8aaee1542 -Author: Loren Merritt -Date: Wed May 11 17:58:00 2005 +0000 - - port recent MC asm changes to amd64. - patch by Josef Zlomek. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@225 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d926e41d04312639d762d79af3867d61ce340591 -Author: Eric Petit -Date: Wed May 11 16:22:18 2005 +0000 - - ppc/*: - + Removed unused code - + Optimized mc chroma 4xH and satd 8x4 and 4x8 - + Won a bunch of cycles by not trusting gcc about inlining and - unrolling properly - (about 17% faster globally) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@224 df754926-b1dd-0310-bc7b-ec298dee348c - -commit aecc6ab057616f32eb0643b36db2d5b04d7a07ea -Author: Loren Merritt -Date: Wed May 11 15:57:43 2005 +0000 - - New ratecontrol options: - 1pass ABR. VBV constraint for ABR and 2pass. - There is no longer a dedicated CBR mode: use ABR+VBV. - VfW now uses ABR instead of CQP for 1st of multipass. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@223 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 540fba7a1404909074eb08e76b98d7f9d36fd5e9 -Author: Loren Merritt -Date: Wed May 11 00:15:34 2005 +0000 - - use a predicted mv as starting point for subpel refinement. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@222 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dcb0aebebeb197c75fc5f0f49185f6afb6fd90ec -Author: Loren Merritt -Date: Tue May 10 08:21:36 2005 +0000 - - slight speedup in halfpel interpolation. - patch by Mathieu Monnier. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@221 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 22a567bbe57fec9cf4beacca7517cc6d9139e091 -Author: Loren Merritt -Date: Fri May 6 19:38:40 2005 +0000 - - Cleaner allocation of tmp space in halfpel interpolation; fixes some valgrind/nasm warnings. - patch by Mathieu Monnier. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@220 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ca4a34dfe0e6d93ce7598dd18c3c6af8c611d7e5 -Author: Loren Merritt -Date: Tue May 3 08:25:31 2005 +0000 - - "2pass failed to converge" is no longer considered fatal. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@219 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ab2cdf4b804f9e97a112fa4be96c1306522746e4 -Author: Loren Merritt -Date: Sat Apr 30 01:20:50 2005 +0000 - - Updated MSVC project files. - thanks to Bonzi. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@218 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e0a640413f484d1db034a9ecbd0fa472204f273a -Author: Loren Merritt -Date: Mon Apr 25 18:39:32 2005 +0000 - - cosmetics. - silence some gcc warnings. - amd64 doesn't need a separate copy of the c/h files, only the asm. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@217 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d2ad6a20941a4f25b69c88d136e7450d10b035be -Author: Loren Merritt -Date: Fri Apr 22 04:05:35 2005 +0000 - - 10l (214 wrote wrong DPB size in SPS -> B-pyramid broke) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@216 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7229a11c2fe117e0511cd76fa264baf25be92a5f -Author: Loren Merritt -Date: Thu Apr 21 09:20:43 2005 +0000 - - CLI (mp4): return to 'capture' output mode, remove useless SetCtsPackMode() (fixed in gpac). - Note: requires gpac cvs-20050419 or later. - patch by bobo. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@215 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9b44391701779bfb0d291592d1d81c70bcf6c116 -Author: Loren Merritt -Date: Tue Apr 19 23:09:29 2005 +0000 - - combined L0 & L1 reference lists are limited to a total of 16 pics. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@214 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 41f9b8134c332599555bb44c3d0b8e94af44ebf9 -Author: Loren Merritt -Date: Tue Apr 19 18:44:42 2005 +0000 - - amd64 asm patch, part2. - by Josef Zlomek ( josef dot zlomek at xeris dot cz ) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@213 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 413d8fa90917044e0ffaffb7009ccbc8059c61b0 -Author: Loren Merritt -Date: Tue Apr 19 18:35:45 2005 +0000 - - amd64 asm patch, part1. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@212 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7d35ba6bf080610d8f144f4270e961c69ba14f1c -Author: Loren Merritt -Date: Tue Apr 19 08:45:36 2005 +0000 - - Allow manual selection of fullpel ME method. New method: Exhaustive search. - based on a patch by Tuukka Toivonen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@211 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0c641421898f5c3087d52abcfd35ab617d101010 -Author: Loren Merritt -Date: Tue Apr 19 01:42:12 2005 +0000 - - misc makefile changes. - propogate --extra-cflags to vfw. - 'make clean' removes x264.exe and vfw. - tweak dependencies. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@210 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1519835f0fa218993ed031a2247ec88eb5906dd7 -Author: Loren Merritt -Date: Mon Apr 18 02:00:58 2005 +0000 - - 10l (CLI: fflush after progress update) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@209 df754926-b1dd-0310-bc7b-ec298dee348c - -commit da4c0384503bd7b2fa7752ef2045e5060e5df0cd -Author: Loren Merritt -Date: Sun Apr 17 18:43:17 2005 +0000 - - CLI: progress indicator - - - git-svn-id: svn://svn.videolan.org/x264/trunk@208 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a61378bea90edd13a0e9b907917f7645e9266750 -Author: Loren Merritt -Date: Sat Apr 16 20:21:06 2005 +0000 - - VfW: build from main makefile - - - git-svn-id: svn://svn.videolan.org/x264/trunk@207 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c6f3d17ffa67ad27f126bf579a08a443023ad0d3 -Author: Eric Petit -Date: Fri Apr 15 17:26:09 2005 +0000 - - [mp4] ftyp & moov boxes at the begining of the file, (thanks to jeanlf - for comments) - - patch by bobololo - - - git-svn-id: svn://svn.videolan.org/x264/trunk@206 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 74eecd32358de0799a1b9bad041ebb6550002769 -Author: Loren Merritt -Date: Thu Apr 14 23:04:48 2005 +0000 - - CLI: --fps had side-effects. fixed. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@205 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78ca42c56ec53e153fef1b2a1a612191c840d797 -Author: Loren Merritt -Date: Thu Apr 14 21:59:00 2005 +0000 - - CLI: cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@204 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e06dfd4ac1cd0c80525f2dfbacbce28c543770fc -Author: Loren Merritt -Date: Thu Apr 14 19:45:08 2005 +0000 - - Makefile: strip x264cli. - tweak stats summary. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@203 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 29facf8bf218a7c7c47ca48c8b7abb6672d6544e -Author: Laurent Aimar -Date: Wed Apr 13 14:25:32 2005 +0000 - - * x264.c: Fix ctts box creation. Patch by bobololo from Ateme. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@202 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1014aa4e4fa0097e98754afbcf68245a14480710 -Author: Eric Petit -Date: Wed Apr 13 03:43:07 2005 +0000 - - common/ppc: more cleaning, optimized a bit - - - git-svn-id: svn://svn.videolan.org/x264/trunk@201 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 77404162c8588abc9b720b88e20fac34dfe31139 -Author: Loren Merritt -Date: Tue Apr 12 20:38:40 2005 +0000 - - CLI: require output file (don't default to stdout). warn if trying to use mp4 or avis when not supported. misc cleanup. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@200 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fe905276b25c8aa202379d0b5c0115d7b5b631c8 -Author: Eric Petit -Date: Tue Apr 12 18:45:24 2005 +0000 - - configure: use -falign-loops=16 on OS X - common/ppc/: added AltiVecized mc_chroma + cleaning - checkasm.c: really fixed MC tests - - - git-svn-id: svn://svn.videolan.org/x264/trunk@199 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a1b9531707b835e6934cadfb78249149f6351d7e -Author: Loren Merritt -Date: Tue Apr 12 17:33:10 2005 +0000 - - Configure tweaks. Allow avis-input in mingw. Turn off debug by default. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@198 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 35d85ca65d77f4013cfc37b2dd76b9ef87db144d -Author: Eric Petit -Date: Tue Apr 12 16:34:48 2005 +0000 - - checkasm.c: fixed MC tests - - - git-svn-id: svn://svn.videolan.org/x264/trunk@197 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c0abfd39627fcb3e2f6c9aed7ebbed7dfda9230e -Author: Loren Merritt -Date: Tue Apr 12 03:34:25 2005 +0000 - - CLI: MP4 muxing. - patch by bobo from Ateme. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@196 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e1b747ff05b28ee786425d48be53376c620a1cdc -Author: Eric Petit -Date: Mon Apr 11 21:21:05 2005 +0000 - - Cygwin fixes - - - git-svn-id: svn://svn.videolan.org/x264/trunk@195 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b7c3b444753d5ddce3b87249c96a207c85301075 -Author: Eric Petit -Date: Mon Apr 11 20:52:31 2005 +0000 - - configure: ooops, restored -g - ratecontrol.c: OS X has exp2f in -lmx - checkasm: quick compile fix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@194 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ecbf942b1e46e1a4df0e8fd87db538342d968059 -Author: Måns Rullgård -Date: Mon Apr 11 20:00:49 2005 +0000 - - add x86_64 to configure - - - git-svn-id: svn://svn.videolan.org/x264/trunk@193 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7d9ac7c215bc0b77c538d100c92498e847e1cfa8 -Author: Eric Petit -Date: Mon Apr 11 19:41:28 2005 +0000 - - set svn:ignore - - - git-svn-id: svn://svn.videolan.org/x264/trunk@192 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6490f4398d9e28e65d7517849e729e14eede8c5b -Author: Eric Petit -Date: Mon Apr 11 19:28:03 2005 +0000 - - Added a configure to detect the platform/system/etc so people don't - have to edit the Makefile (will work for Linux/OS X/BeOS/FreeBSD, feel - free to modify for others), and we can now remove the Jamfile which - was broken most of the time anyway. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@191 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b12cb05a8fee91c50dc3d1d3c2569a801cc1a5e3 -Author: Loren Merritt -Date: Sun Apr 10 23:35:01 2005 +0000 - - Makefiles: better dependencies for SEI version number - - - git-svn-id: svn://svn.videolan.org/x264/trunk@190 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 90a6fd3e4e8685f990c7f9fe05c8718e77c0e080 -Author: Måns Rullgård -Date: Thu Apr 7 23:26:51 2005 +0000 - - Forgot rbsp_trailing_bits in AUD NAL - - - git-svn-id: svn://svn.videolan.org/x264/trunk@189 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e103917aa0cbb702ba09c2507565398d7f129c2e -Author: Måns Rullgård -Date: Thu Apr 7 23:11:06 2005 +0000 - - Optionally use access unit delimiter NAL units. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@188 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d4663a41a4bb0c67eb861046ed2917111257883f -Author: Loren Merritt -Date: Tue Apr 5 21:32:52 2005 +0000 - - VfW: cleaner install on win98. - patch by Riccardo Stievano. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@187 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 990e58b646629a2937e76794b97892d7806a932e -Author: Loren Merritt -Date: Tue Apr 5 20:50:13 2005 +0000 - - new util: countquant for 2pass statsfiles - - - git-svn-id: svn://svn.videolan.org/x264/trunk@186 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b780e711dd0a1e97535c690f84e9726eefa95c2c -Author: Loren Merritt -Date: Tue Apr 5 20:39:47 2005 +0000 - - print svn version number in SEI info and in CLI/VfW. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@185 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ea9308c6b3bfc891a2dcebe1dc89e0c301c57066 -Author: Måns Rullgård -Date: Thu Mar 31 21:20:41 2005 +0000 - - Make reconstructed frame available to caller. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@184 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6dcb0e4b6d827b9c79f402ff91049b2830b8a743 -Author: Loren Merritt -Date: Thu Mar 31 06:03:22 2005 +0000 - - make install - - - git-svn-id: svn://svn.videolan.org/x264/trunk@183 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 11de51977d28b9ff242aa137f9c270b0f1b3f465 -Author: Loren Merritt -Date: Thu Mar 31 05:59:11 2005 +0000 - - free() -> x264_free() - - - git-svn-id: svn://svn.videolan.org/x264/trunk@182 df754926-b1dd-0310-bc7b-ec298dee348c - -commit de97a12a8b976acad6afdbeda54e4bfbdd9bf8b5 -Author: Loren Merritt -Date: Mon Mar 28 05:08:43 2005 +0000 - - CLI: flush B-frames at the end of the encode - - - git-svn-id: svn://svn.videolan.org/x264/trunk@181 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0df24cf995faf3169fe15d808e4fff00c18ad7dc -Author: Loren Merritt -Date: Sun Mar 27 20:49:59 2005 +0000 - - convert mc's inline asm to nasm (slight speedup and msvc compatibility). - patch by Mathieu Monnier. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@180 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48c34d0bffd57ba7c73f20bd6c892b4b06131140 -Author: Loren Merritt -Date: Sun Mar 27 06:58:35 2005 +0000 - - buffer overruns in slicetype_decision. - patch by Mathieu Monnier. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@179 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a1c2c04693de8fe2d7712249c06c7a6406d0b422 -Author: Loren Merritt -Date: Thu Mar 17 17:35:11 2005 +0000 - - tweak usage message - - - git-svn-id: svn://svn.videolan.org/x264/trunk@178 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ac93ce1bb01701ddc0faa79eeb1079288b6e3543 -Author: Loren Merritt -Date: Wed Mar 16 22:02:02 2005 +0000 - - Simplify inter analysis option names. (psub16x16 -> p8x8) - patch by Robert Swain. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@177 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 04557605de60718c172ce6d1fc26b30d6fd2ee8b -Author: Loren Merritt -Date: Wed Mar 16 21:52:59 2005 +0000 - - 173 broke .depend when debugging was enabled - - - git-svn-id: svn://svn.videolan.org/x264/trunk@176 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bf7f679c793a2db2580e00f87eb3bed45b47a805 -Author: Loren Merritt -Date: Wed Mar 16 20:50:19 2005 +0000 - - early termination for intra4x4 analysis - - - git-svn-id: svn://svn.videolan.org/x264/trunk@175 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ee5b2be9406eb8b9b11180f406febc944fd8845d -Author: Måns Rullgård -Date: Tue Mar 15 12:09:00 2005 +0000 - - Check/fix range of x264_param_t.rc.i_qp_constant. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@174 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 94086b8bb5885f76093e74b5a5b0f4d4db287c95 -Author: Eric Petit -Date: Tue Mar 15 07:21:18 2005 +0000 - - Cleaned up and fixed Makefile for OS X and BeOS (hopefully FreeBSD too) - It defaults for x86/linux, others: uncomment the lines for your - platform & OS at the beginning of the Makefile - - - git-svn-id: svn://svn.videolan.org/x264/trunk@173 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cb6a40f00d1f5f14b9c14974309b43955a0b83ed -Author: Loren Merritt -Date: Tue Mar 15 02:30:16 2005 +0000 - - macroblock_analyse: simplify cost comparisons. (cosmetic) - CLI: enable cabac by default. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@172 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 79fa69451ad4552c2dd84fcd3c5e75da136af17f -Author: Loren Merritt -Date: Mon Mar 14 22:47:19 2005 +0000 - - Chroma ME (P-frames only). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@171 df754926-b1dd-0310-bc7b-ec298dee348c - -commit abbd6c56da04a9e10d10a4bd158104826e8fc81a -Author: Loren Merritt -Date: Mon Mar 14 13:05:57 2005 +0000 - - SSE optimized chroma MC. - patch by Radek Czyz. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@170 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 553b8295bac6b6fd9d91e591bca1299923f0fc96 -Author: Loren Merritt -Date: Sun Mar 13 23:36:42 2005 +0000 - - 167 broke psnr calculation for non-mod-32 inputs - - - git-svn-id: svn://svn.videolan.org/x264/trunk@169 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 70da43b22cd394160c4358a33330446bc104c78e -Author: Eric Petit -Date: Sun Mar 13 18:49:51 2005 +0000 - - sqrtf requires -lmx on Mac OS X - - - git-svn-id: svn://svn.videolan.org/x264/trunk@168 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e72f431c685731663d2824aa768218927490e704 -Author: Loren Merritt -Date: Sun Mar 13 10:25:11 2005 +0000 - - use mmx ssd for psnr calculation. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@167 df754926-b1dd-0310-bc7b-ec298dee348c - -commit be2f0e088810860ab760d8d362a9450aaf917a29 -Author: Loren Merritt -Date: Sun Mar 13 08:26:52 2005 +0000 - - revert 164. blame Spyder. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@166 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 73522c84014f240abe7ee70c6e98657b08f97b44 -Author: Loren Merritt -Date: Sun Mar 13 07:04:16 2005 +0000 - - SSD comparison function (not yet used). - Cosmetics in mmx SAD. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@165 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c68f34e555e22f4687d985ade6d81ea87cc73f29 -Author: Loren Merritt -Date: Sat Mar 12 00:23:50 2005 +0000 - - VfW: reject YUY2 and RGB input formats - - - git-svn-id: svn://svn.videolan.org/x264/trunk@164 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fd527e3760074a19637c503d0f828d97b7c079fd -Author: Måns Rullgård -Date: Fri Mar 11 18:10:35 2005 +0000 - - Really fix QP override. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@163 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a2245645c8b3948de32f2c27f8cb0acb86e4d2d4 -Author: Loren Merritt -Date: Fri Mar 11 02:15:25 2005 +0000 - - write VUI bitstream restrictions - - - git-svn-id: svn://svn.videolan.org/x264/trunk@162 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 29dee22af6b6174f54bb621f1038c0604a42d21e -Author: Loren Merritt -Date: Thu Mar 10 23:03:55 2005 +0000 - - AVI & Avisynth input (win32 only). - patch by bobo from Ateme. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@161 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 79ebb19964a115ab8de21fe1e90162ff9954b283 -Author: Loren Merritt -Date: Thu Mar 10 21:42:24 2005 +0000 - - expose option "chroma qp offset" - - - git-svn-id: svn://svn.videolan.org/x264/trunk@160 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2fc52995de23f963e67ac408dc247ee3bf68c952 -Author: Måns Rullgård -Date: Thu Mar 10 19:42:05 2005 +0000 - - Fix per-frame QP override broken in rev 137. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@159 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 99b10e79b55520264a29ae4b82d67cd60005faab -Author: Måns Rullgård -Date: Tue Mar 8 01:08:40 2005 +0000 - - Don't include x264.o in the library. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@158 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9f97e90ef5f3df22a560c10ad49a658041c88629 -Author: Loren Merritt -Date: Sun Mar 6 21:07:10 2005 +0000 - - VfW: expose B pyramid and weighted B prediction. - patch by Riccardo Stievano. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@157 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4fbdc5c1ee77497e6455cd72a895383fb99a77fe -Author: Loren Merritt -Date: Sun Mar 6 11:39:08 2005 +0000 - - 10l - - - git-svn-id: svn://svn.videolan.org/x264/trunk@156 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 1f735a32c9626b86b608c9604170b3f4c4549159 -Author: Loren Merritt -Date: Sun Mar 6 09:50:17 2005 +0000 - - buffer overrun when bframes == X264_BFRAME_MAX - - - git-svn-id: svn://svn.videolan.org/x264/trunk@155 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c90534d6c85664c7a161cbe70a7928cb65f19e18 -Author: Loren Merritt -Date: Sun Mar 6 05:12:25 2005 +0000 - - Adaptive B skipped some POC numbers (slightly reducing b_direct efficiency). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@154 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d0bd44f769543e81280a5a97bbe985c6dfd86cf1 -Author: Loren Merritt -Date: Sat Mar 5 09:34:53 2005 +0000 - - avc2avi: - Use POC to determine frame boundaries (frame_num couldn't distinguish consecutive B-frames). - Fix keyframe flag to mark IDR only, not all I slices. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@153 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f01e3d5f2bffe3a033ecbaa608be6b4f3aca9c60 -Author: Loren Merritt -Date: Sat Mar 5 04:16:05 2005 +0000 - - allow 16 refs (instead of 15) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@152 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c47bb1ffbe630609fda9bd7c9488bae7f0078a4e -Author: Loren Merritt -Date: Sat Mar 5 00:37:25 2005 +0000 - - report version number in decimal instead of hex - - - git-svn-id: svn://svn.videolan.org/x264/trunk@151 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 91536acdb42ec9615a50f5b9f3af34b6c6408049 -Author: Loren Merritt -Date: Fri Mar 4 12:52:35 2005 +0000 - - New option: "B-frame pyramid" keeps the middle of 2+ consecutive B-frames as a reference, and reorders frame appropriately. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@150 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9591b0383829c707791b7797a68a79008349e198 -Author: Loren Merritt -Date: Thu Mar 3 04:36:46 2005 +0000 - - smarter parsing of resolution from commandline - - - git-svn-id: svn://svn.videolan.org/x264/trunk@149 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4337ee8de793cb5c6f0dee3b0a851041466fec7e -Author: Eric Petit -Date: Thu Mar 3 03:02:27 2005 +0000 - - ratecontrol.c: fixed exp2f on BeOS so rate control works properly - - - git-svn-id: svn://svn.videolan.org/x264/trunk@148 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4b2ba852564a05a651f9312651cc402043089648 -Author: Loren Merritt -Date: Wed Mar 2 22:44:31 2005 +0000 - - Fix a buffer overrun with very long MVs. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@147 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ccf61cef868c38bb71d746fcc03f583d93fd3e4c -Author: Loren Merritt -Date: Mon Feb 28 19:01:58 2005 +0000 - - wrong stride in lowres image - - - git-svn-id: svn://svn.videolan.org/x264/trunk@146 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b04a2601088a855361169a3eb5236e8b998f7e70 -Author: Loren Merritt -Date: Mon Feb 28 18:50:55 2005 +0000 - - 10l (fast1stpass was slower than non-fast) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@145 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d05adbc7f35e461879f1559a095b82b7253d78cd -Author: Loren Merritt -Date: Fri Feb 25 03:10:04 2005 +0000 - - Disable deblocking filter in frames of sufficiently low QP that it would have no effect. (Saves a little CPU time in the decoder.) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@144 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d836d8f9b7ae3962ac0f5a325f43ca9d6a87a7ff -Author: Loren Merritt -Date: Fri Feb 25 00:46:56 2005 +0000 - - Simplify x264_frame_expand_border. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@143 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 067f22c153eaf19e1ba5ec35deef96a8fb3eae4e -Author: Loren Merritt -Date: Thu Feb 24 13:09:55 2005 +0000 - - Altivec functions for MC using the cached halfpel planes. - Patch by Fredrik Pettersson . - - - git-svn-id: svn://svn.videolan.org/x264/trunk@142 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 323b54ffa0bbcfe82b02cb0d204e9ba5121264fd -Author: Loren Merritt -Date: Thu Feb 24 13:01:21 2005 +0000 - - Don't use uninitialize MVs in x264_mb_predict_mv_ref16x16. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@141 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 92f6f36f1d58fd9809263aba16ddeb78ec2ee47d -Author: Loren Merritt -Date: Thu Feb 24 13:00:34 2005 +0000 - - Implicit weights in B16x16 analysis were swapped. - patch by Radek Czyz. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@140 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c2b0c8a0e11ef079e82789f79d54c30c9b2364ae -Author: Loren Merritt -Date: Thu Feb 24 08:31:12 2005 +0000 - - Cosmetics: Some renaming. Move the rest of slice type decision from encoder.c to slicetype_decision.c - - - git-svn-id: svn://svn.videolan.org/x264/trunk@139 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 24a6672ecaa6bccc65c4043248c1787e3161062c -Author: Loren Merritt -Date: Thu Feb 24 08:17:31 2005 +0000 - - Take into account keyint_max in B-frame decision. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@138 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 68c13530b5ffc28325aee408f4cd19ab7da06715 -Author: Loren Merritt -Date: Wed Feb 23 19:58:02 2005 +0000 - - Preliminary adaptive B-frame decision (not yet tuned). - Fix flushing of delayed frames when the encode finishes. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@137 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e2efb4b7d5885112a32b5b710958fe9fa5458bbf -Author: Loren Merritt -Date: Tue Feb 22 22:08:07 2005 +0000 - - Write x264's version in a SEI message. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@136 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3ada9c0514d0d785dec7de1f5d092fbda7a629cb -Author: Loren Merritt -Date: Tue Feb 22 10:46:28 2005 +0000 - - VfW: Enable weighted B prediction when max B-frames > 1. Enforce max reference frames <= 15. - patch by Riccardo Stievano. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@135 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 834eac288ff5e8d40a1a751d61a59d77d67c0537 -Author: Loren Merritt -Date: Tue Feb 22 05:19:02 2005 +0000 - - Add: implicit weighted prediction for B-frames. - Slightly optimize x264_mb_mc_01xywh. - Fix an error in B16x8 cost. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@134 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 47706e75fdf80b0c0011e2d697e5e181060a08fe -Author: Loren Merritt -Date: Sun Feb 20 01:52:12 2005 +0000 - - Oops, increment API number. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@133 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d7443f67331e392b580564f815a34c5762f71f03 -Author: Loren Merritt -Date: Sun Feb 20 01:26:03 2005 +0000 - - Configurable level. Levels are still not enforced; it's up to the user to select a level compatible with the rest of the encoding options. - Patch by Jeff Clagg . - - - git-svn-id: svn://svn.videolan.org/x264/trunk@132 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 15450dbe916e971793989dc44762e1bda23ca153 -Author: Loren Merritt -Date: Sat Feb 19 06:18:22 2005 +0000 - - Always use the tempfile and rename method for multipass stats, so that VfW knows whether the previous pass completed. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@131 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b1f47ea51c7057ebc0d8938a224662cc6fe23c80 -Author: Loren Merritt -Date: Fri Feb 18 07:47:35 2005 +0000 - - More tweaks to bitrate prediction. - Change error messages when 2pass fails to converge. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@130 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0606b3ac325a3cb3fd1fe648d9a6468ab731f7d5 -Author: Loren Merritt -Date: Thu Feb 17 19:31:15 2005 +0000 - - Improved 2pass bitrate predictor. No real change most of the time, but allows correct ratecontrol on some pathological videos that used to diverge completely. Also improves prediction when 2nd pass bitrate is very different from 1st pass. - The new qscale2bits() has no simple inverse, so I also had to change rc_eq to output qscale instead of bits. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@129 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3b2116cdd0aceff59036a17c6f9aa32592de4851 -Author: Loren Merritt -Date: Wed Feb 16 04:59:21 2005 +0000 - - Some defines needed by MSVC, and convert the DSP files to DOS-style newlines. - Patch by Radek Czyz. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@128 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d688918e861714e23f8fa7bdaaa6bf47ffec0395 -Author: Loren Merritt -Date: Mon Feb 14 23:32:38 2005 +0000 - - Precalculate lambda*bits for all allowed mvs. 1-2% speedup. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@127 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ac411e297aaaec200b33b6dab082e12c55c3b7ef -Author: Loren Merritt -Date: Mon Feb 14 11:08:00 2005 +0000 - - Deblock B-frames. (Not yet used, since B-frames aren't kept as references.) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@126 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 50a924885b78abc24dba59bb6717095bcde15d1b -Author: Loren Merritt -Date: Mon Feb 14 05:58:50 2005 +0000 - - Simplify x264_mb_mc_01xywh() - - - git-svn-id: svn://svn.videolan.org/x264/trunk@125 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b2d78b5c7a423a75f0cb555173d92011b4accc44 -Author: Loren Merritt -Date: Mon Feb 14 04:10:15 2005 +0000 - - Save some memcopies in halfpel ME. - Patch by Radek Czyz. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@124 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 46141bf206dc672c3ab2b50850df702305ecb8ff -Author: Loren Merritt -Date: Sun Feb 13 09:49:42 2005 +0000 - - Cache half-pixel interpolated reference frames, to avoid duplicate motion compensation. - 30-50% speedup at subq=5. - Patch by Radek Czyz. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@123 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d81fa19a0af848bd97b2250e1405b5fac54820b1 -Author: Loren Merritt -Date: Sat Feb 12 12:26:52 2005 +0000 - - In N-pass mode if stat_in and stat_out are the same file, instead save to a temp file and overwrite stat_in only when the encode finishes. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@122 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dc270b76915975ab1ea6e16992aa79e96e6801f7 -Author: Loren Merritt -Date: Fri Feb 11 19:04:44 2005 +0000 - - VfW: x264_log now creates a window for error messages - - - git-svn-id: svn://svn.videolan.org/x264/trunk@121 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ef4d1fa4a99a23420708083c66e882d7cfd21d9f -Author: Loren Merritt -Date: Thu Feb 10 22:11:39 2005 +0000 - - cosmetics - - - git-svn-id: svn://svn.videolan.org/x264/trunk@120 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6f298b9d889536c5dc14fc08faa95447a322a1cd -Author: Loren Merritt -Date: Thu Feb 10 21:54:40 2005 +0000 - - bs_align_1() didn't actually write all ones. (so encoded streams with cabac were technically invalid, though no decoder cares.) - Patch by Tuukka Toivonen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@119 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ca4ae5219a95e05e80e707ce6828b79276e1f795 -Author: Loren Merritt -Date: Tue Feb 8 23:30:33 2005 +0000 - - VfW: tweak option names - - - git-svn-id: svn://svn.videolan.org/x264/trunk@118 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 85c92e1be46a3fa90e81121a03bfb8479e87a2da -Author: Loren Merritt -Date: Sun Feb 6 06:47:42 2005 +0000 - - VfW: use separate stats files for each pass of an N-pass encode. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@117 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 70ce7a4be261f638a614918ed0e822b7f60d8269 -Author: Loren Merritt -Date: Sat Feb 5 22:55:48 2005 +0000 - - VfW: Enable multipass by default, increase the configurable range of I and B quant ratios. - core: Tweak error messages. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@116 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2796ba0138a59efa32357f9dc708eefe01c55882 -Author: Loren Merritt -Date: Fri Feb 4 01:20:55 2005 +0000 - - r114 didn't completely fix the problem, trying again. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@115 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 33140d0984c7415ed0441858022e777794934550 -Author: Loren Merritt -Date: Thu Feb 3 11:03:17 2005 +0000 - - Another MV clipping fix. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@114 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 917924591931e63df4458eb90d5d8bce4bff035d -Author: Loren Merritt -Date: Tue Feb 1 10:13:51 2005 +0000 - - Simplify x264_cabac_mb_type. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@113 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7927c9ec8c4991040728d64893490c8ecb3d9b44 -Author: Loren Merritt -Date: Mon Jan 31 12:20:23 2005 +0000 - - More accurate clipping rectangle for motion search. (slight compression improvement for high-motion scenes) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@112 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5b750c35db61a4d26e8801c175da422d11748aad -Author: Eric Petit -Date: Fri Jan 28 15:17:51 2005 +0000 - - encoder/encoder.c: gcc < 3 compile fix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@111 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3f098e9644a31f17490f05d0ecea08e6443aa110 -Author: Loren Merritt -Date: Fri Jan 28 13:47:14 2005 +0000 - - Change default level from 2.1 to 4.0 until I get around to calculating actual levels. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@110 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bccb009f9fa67797da6cd3da742ad9a27266b12b -Author: Loren Merritt -Date: Fri Jan 28 02:51:21 2005 +0000 - - Clipping mvs to within picture + emulated border when running motion compensation. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@109 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c16119a2ccf2dff2328628dd9ded4681f5502c38 -Author: Loren Merritt -Date: Thu Jan 27 11:33:14 2005 +0000 - - Fix clipping of mvs in probe_pskip. (Previously it mixed up fullpel with qpel.) This should eliminate the black blocks that sometimes appeared in high motion, low detail scenes. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@108 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6558c8322f175e3970c8a2f351dd7f8f66e130d2 -Author: Loren Merritt -Date: Tue Jan 25 22:25:05 2005 +0000 - - Fix length of strings stored in the registry. - Patch by Riccardo Stievano. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@107 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 084175d95978011c837e4363616f5cac5794bc07 -Author: Loren Merritt -Date: Mon Jan 24 22:55:48 2005 +0000 - - registry values for min/max keyint were mixed up - - - git-svn-id: svn://svn.videolan.org/x264/trunk@106 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 79f73aa2f5a5a6d16dadbc10c7ae9647fae76a29 -Author: Loren Merritt -Date: Sun Jan 23 09:38:42 2005 +0000 - - VfW: expose option "Nth pass" (i.e. simultaneously read and update the multipass stats file). - Patch by Riccardo Stievano. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@105 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0292410a8da029c45625c9f8670c9bf16c828c12 -Author: Loren Merritt -Date: Fri Jan 21 08:22:47 2005 +0000 - - add "make NDEBUG=1" to strip library - - - git-svn-id: svn://svn.videolan.org/x264/trunk@104 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 66ee02bdb20e5fe43f3dabbaa10e61d49a945a03 -Author: Loren Merritt -Date: Tue Jan 18 21:32:20 2005 +0000 - - finish subpixel motion refinement for B-frames (up to 6% reduced size of B-frames at subq <= 3) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@103 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 37c0a244e49e607855a55a06b4b911eaadbc4604 -Author: Loren Merritt -Date: Tue Jan 18 12:19:39 2005 +0000 - - VfW: expose the 2pass ratecontrol option: qcomp ("bitrate variability"). - Some rearranging of the advanced configuration dialogue. - Patch by Riccardo Stievano . - - - git-svn-id: svn://svn.videolan.org/x264/trunk@102 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c80d310f2af65750dafc3decdab6c1df2cbbc5e3 -Author: Loren Merritt -Date: Mon Jan 17 04:29:24 2005 +0000 - - VfW: Support ip_factor and pb_factor, some cleanups. - patch by Riccardo Stievano - - - git-svn-id: svn://svn.videolan.org/x264/trunk@101 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 19ed02568e95f69e2dd33f9b8d8cd1ff900f268b -Author: Loren Merritt -Date: Sat Jan 15 11:28:44 2005 +0000 - - Use floats instead of int64 in log messages, since win32 (incl. mingw) doesn't understand %lld. - Also display MB statistics in percent instead of number. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@100 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4394c4d549caac00dedfeacffa14857839365f04 -Author: Loren Merritt -Date: Sat Jan 15 10:28:51 2005 +0000 - - finished printf -> x264_log conversion. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@99 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 04bb83346e4c7ba29c4f6b0c5e376ebde81a899b -Author: Loren Merritt -Date: Fri Jan 14 21:38:13 2005 +0000 - - Don't apply keyframe boost to I-frames that are followed by another I. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@98 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7b46f42d142f790ecf053f952ce024b467eac762 -Author: Loren Merritt -Date: Fri Jan 14 01:04:28 2005 +0000 - - New VfW option: "fast 1st pass" automatically disables some partitions and reduces ME quality and number of reference frames. - Removed option direct_pred=none, since it provides no benefits. - Patch by Riccardo Stievano . - - - git-svn-id: svn://svn.videolan.org/x264/trunk@97 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 177e211333d91a06ec2df3ac87c12336812d32e6 -Author: Loren Merritt -Date: Thu Jan 13 19:47:51 2005 +0000 - - vfw: tweak wording and defaults - - - git-svn-id: svn://svn.videolan.org/x264/trunk@96 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b80ed7030d5979bfa2da92a2584078c7f844f28f -Author: Måns Rullgård -Date: Thu Jan 13 18:18:05 2005 +0000 - - From Riccardo Stievano : - - here's a patch that fixes the VfW frontend after the changes made in - revision 93 (GOP size management). Default values for i_keyint_max - and i_keyint_min have been set to 250 and 10, respectively. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@95 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0a7090477d28a3f708a7b1edb89845e10b71191d -Author: Loren Merritt -Date: Thu Jan 13 06:11:22 2005 +0000 - - My last change of IDR decision broke in 2pass mode. fixed by remembering which frames are IDR. - Disable benchmarking, as it was very slow for some people, and we already know that all the time is spent in macroblock analysis. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@94 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 648328088b1c4bfb3afcbc92b6711cb7b7b5e068 -Author: Loren Merritt -Date: Wed Jan 12 09:50:38 2005 +0000 - - Changes the mechanics of max keyframe interval: - Now enforces min and max GOP sizes, and allows variable numbers of - non-IDR I-frames within a GOP. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@93 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5b13c839df14fdd8b94724230ec2e92cba3164a1 -Author: Loren Merritt -Date: Wed Jan 12 05:23:16 2005 +0000 - - MinGW compatible resource.rc by Radek Czyz - - - git-svn-id: svn://svn.videolan.org/x264/trunk@92 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8379464a55722872e63cb6b2120e81ed5ac80781 -Author: Loren Merritt -Date: Wed Jan 12 04:45:10 2005 +0000 - - strict QP offset for B-frame vs following P-frame - strict QP offset for I-frame vs GOP average - - - git-svn-id: svn://svn.videolan.org/x264/trunk@91 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0af83ad2ddca90ba0a4066d67513a32798395ce6 -Author: Loren Merritt -Date: Tue Jan 11 06:20:37 2005 +0000 - - r72 broke B-frames without intra4x4. fixed. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@90 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5ccb93c1a78a8dfeb1953426b89494f6f5d36fec -Author: Loren Merritt -Date: Mon Jan 10 09:29:31 2005 +0000 - - updated VfW interface by Radek Czyz - - - git-svn-id: svn://svn.videolan.org/x264/trunk@89 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fc2e7ba68bfcb5b22f510839b6f0b3da333671fd -Author: Loren Merritt -Date: Sat Jan 8 02:51:24 2005 +0000 - - improved mv prediction: 1-3% better compression of B-frames - early termination for B-frame ref search: up to 20% faster with lots of refs. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@88 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7fc5f99d43b5c2498b368e6f5c1620f591bd2a45 -Author: Loren Merritt -Date: Fri Jan 7 18:45:11 2005 +0000 - - allow constant qp on Nth pass (e.g. for forcing frame types) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@87 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 094266b31789338c7a6b91f96aa2fc8c1bd72f94 -Author: Loren Merritt -Date: Fri Jan 7 11:08:55 2005 +0000 - - disable subme=0 (the huge bitrate penalty wasn't worth the speed) - renumber direct_pred - - - git-svn-id: svn://svn.videolan.org/x264/trunk@86 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3a11b25d31f7eac4ccc4252b5525a6554c6d22ba -Author: Loren Merritt -Date: Wed Jan 5 09:15:35 2005 +0000 - - oops, last patch had some debug statements - - - git-svn-id: svn://svn.videolan.org/x264/trunk@85 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0ad06e691819f2a2a05f673b50afe8b676d48f44 -Author: Loren Merritt -Date: Wed Jan 5 07:08:40 2005 +0000 - - fix: "x264 -A all" didn't include b8x8 types. - add: "make NDEBUG=1" to strip library - update TODO with B-frame status - - - git-svn-id: svn://svn.videolan.org/x264/trunk@84 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 53295729673187df0ab1143c08fae578b5447376 -Author: Loren Merritt -Date: Wed Jan 5 06:59:29 2005 +0000 - - Reorganize frame type selection: No longer produces consecutive I-frames when B-frames are enabled. Not thoroughly tested, but works for me. - Fix scenecut detection when B-frames are present: Can now produce IDR, but is slower since it re-encodes more frames. This might reduce compression ratio in the presence of quick fade-ins. - 2pass ratecontrol deals more gracefully with completely skipped frames. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@83 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b1d946cd95c7b4d3ab618b9bbb0191949a49ad4c -Author: Loren Merritt -Date: Mon Jan 3 03:47:49 2005 +0000 - - remove Makefile.cygwin because build/cygwin/Makefile is more up to date. - put correct object file names in .depend - - - git-svn-id: svn://svn.videolan.org/x264/trunk@82 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 456e8fdc5d5a20ad14a41e83db21b9aa5529c476 -Author: Loren Merritt -Date: Mon Jan 3 02:32:44 2005 +0000 - - reduce default verbosity, add option -v - - - git-svn-id: svn://svn.videolan.org/x264/trunk@81 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 007c6f71c2b0d2868dfc46edaf24a5c27eceab47 -Author: Loren Merritt -Date: Fri Dec 31 02:33:21 2004 +0000 - - remove relative include paths, to avoid conflicts with libtool - - - git-svn-id: svn://svn.videolan.org/x264/trunk@80 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b42bd7463a00b65f1e2d5e2c10ff374531e997f6 -Author: Loren Merritt -Date: Fri Dec 31 01:56:26 2004 +0000 - - rename *.asm to avoid conflicts with libtool - - - git-svn-id: svn://svn.videolan.org/x264/trunk@79 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dabd095c2a59ec95f83428555a329049e4ab165f -Author: Loren Merritt -Date: Thu Dec 30 23:58:06 2004 +0000 - - list default settings in --help - - - git-svn-id: svn://svn.videolan.org/x264/trunk@78 df754926-b1dd-0310-bc7b-ec298dee348c - -commit fc1380db831cab2d90a8a116848d9263bd83b871 -Author: Loren Merritt -Date: Thu Dec 30 04:01:58 2004 +0000 - - replace EPZS diamond with a hexagon search pattern. - early termination for multiple reference frame search (up to 1.5x faster). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@77 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ab0c769d9813d82f9d7d6f82cce289ae2e466db8 -Author: Loren Merritt -Date: Wed Dec 29 21:18:14 2004 +0000 - - sps->i_num_ref_frames was set higher than necessary - - - git-svn-id: svn://svn.videolan.org/x264/trunk@76 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e33ed4c9fc447b7e4bc057e85f9ee83b07b714e1 -Author: Loren Merritt -Date: Wed Dec 29 12:08:50 2004 +0000 - - new option: --fps - - - git-svn-id: svn://svn.videolan.org/x264/trunk@75 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d5322b4e055d3710ea48a1d3cb336b5264a9621e -Author: Loren Merritt -Date: Wed Dec 29 10:53:03 2004 +0000 - - various cleanups in macroblock caching. - store motion data for each reference frame (but not yet used). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@74 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9df283c3b29afdcb20387997b09893e450075976 -Author: Loren Merritt -Date: Tue Dec 28 10:14:19 2004 +0000 - - more accurate cost for psub8x8 modes. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@73 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4e5e3770a6b2366676232fe5f335f572f6cdefcb -Author: Loren Merritt -Date: Thu Dec 23 04:33:36 2004 +0000 - - implement macroblock types B_16x8, B_8x16 - tweak thresholds for comparing B mb types - - - git-svn-id: svn://svn.videolan.org/x264/trunk@72 df754926-b1dd-0310-bc7b-ec298dee348c - -commit efbf4ad58c26c6a609a43a2b636ce50e1272f101 -Author: Loren Merritt -Date: Wed Dec 22 21:09:45 2004 +0000 - - simplify x264_mb_predict_mv_direct16x16_temporal - - - git-svn-id: svn://svn.videolan.org/x264/trunk@71 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 457eaa93110fa95c393794d85bde7943b2d325bd -Author: Loren Merritt -Date: Wed Dec 22 20:52:13 2004 +0000 - - option '--frames' limits number of frames to encode. - patch by Tuukka Toivonen - - - git-svn-id: svn://svn.videolan.org/x264/trunk@70 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dfbbcec847ddb9b9bbc4549671c6ce7533a7c098 -Author: Loren Merritt -Date: Wed Dec 22 20:29:19 2004 +0000 - - simplify calvc mb type - - - git-svn-id: svn://svn.videolan.org/x264/trunk@69 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 199ff7406b76dc1c10b756053398bf8a834bcf5c -Author: Loren Merritt -Date: Fri Dec 17 10:57:02 2004 +0000 - - implement macroblock types B_SKIP, B_DIRECT, B_8x8 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@68 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b6954ba2bba2f4fc002e8be4f57d7f3b43871c33 -Author: Loren Merritt -Date: Tue Dec 14 02:04:02 2004 +0000 - - rename 'core/' to 'common/', which avoids conflicts with libtool - - - git-svn-id: svn://svn.videolan.org/x264/trunk@67 df754926-b1dd-0310-bc7b-ec298dee348c - -commit da49faec0719a3e774177356c66a4b41ddd0b10c -Author: Loren Merritt -Date: Wed Dec 8 05:01:57 2004 +0000 - - cleanup stats reporting - report B macroblock types - report average QP - - - git-svn-id: svn://svn.videolan.org/x264/trunk@66 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bc0a1e9b79d725680418be2bf7cf584a739ca47b -Author: Loren Merritt -Date: Wed Dec 8 02:28:58 2004 +0000 - - apply ip_factor and pb_factor in constant quantiser encodes. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@65 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 25b542c01b85c6da7bea789fc60f1f22d7281488 -Author: Loren Merritt -Date: Wed Dec 1 21:23:06 2004 +0000 - - save a little bit of memory - - - git-svn-id: svn://svn.videolan.org/x264/trunk@64 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0f65f519a8539602e08fd87d9c21b0b3f34a80d8 -Author: Loren Merritt -Date: Mon Nov 22 07:34:17 2004 +0000 - - multiple hypothesis mv prediction: - 1-3% improved compression, and .5-1% faster - - - git-svn-id: svn://svn.videolan.org/x264/trunk@63 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2489b6a66d960d46db325a15314ac82fc9f3ed1a -Author: Laurent Aimar -Date: Thu Nov 18 12:30:27 2004 +0000 - - * analyse: we can do 4x4 Horizontal Up mode when LEFT is avaible. - Thanks Stephen Henry for the report. - - git-svn-id: svn://svn.videolan.org/x264/trunk@62 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 71f820d76b4a38b4ed73a12a8aeb93589b18527a -Author: Loren Merritt -Date: Wed Nov 17 18:40:26 2004 +0000 - - improved 2pass ratecontrol: - ensures that I-frames have comparable quantizer to the following P-frames, - and produces more consistent quality in areas of fluctuating complexity. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@61 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 7972ae1f284b1130abb6cc4f3fe963dcb0ee48c8 -Author: Loren Merritt -Date: Fri Nov 12 07:14:24 2004 +0000 - - more informative error message when 2pass fails to converge - - - git-svn-id: svn://svn.videolan.org/x264/trunk@60 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 30c6a09063e499f47077edab206fd79992914ef7 -Author: Måns Rullgård -Date: Thu Nov 11 12:37:24 2004 +0000 - - #include - - - git-svn-id: svn://svn.videolan.org/x264/trunk@59 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a0b32b7cc24d59826f311dcd2e64945a80803dc2 -Author: Loren Merritt -Date: Thu Nov 4 09:19:34 2004 +0000 - - cleanup spacing of frame stats with verbose logging. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@58 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bdf9b10610bf0141ede5812a70d889f7b557560d -Author: Loren Merritt -Date: Thu Oct 28 20:10:53 2004 +0000 - - typo in x264_cabac_mb_sub_b_partition - (see ITU-T H.264 clause 9.3.3.1.2) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@57 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e917887b2e16e5d00d67fa8da7cd828a456fd75d -Author: Eric Petit -Date: Wed Oct 27 19:14:24 2004 +0000 - - Typo - - - git-svn-id: svn://svn.videolan.org/x264/trunk@56 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 851989ac7c839ee2bf42c74a6fd90b5eb78f0a69 -Author: Eric Petit -Date: Wed Oct 27 19:06:47 2004 +0000 - - + No need to emulate memalign on OS X - + Fixed Makefile for OS X - - (Original patch by Peter Handel) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@55 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 57554925f4075d00708be958853d5b2e9a9f6487 -Author: Måns Rullgård -Date: Wed Oct 27 15:43:15 2004 +0000 - - Conditionally inits 1pass rc, only if it's enabled. - This prevents a couple of irrelevant warnings from appearing in - constant QP mode. (Loren Merritt ) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@54 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c9a501a467f06277ed72ba5e222ba00b018364eb -Author: Måns Rullgård -Date: Mon Oct 25 09:40:23 2004 +0000 - - Oops, changing those types messed up some vprintf's. fixed. - (Loren Merrit ) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@53 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 46d0385cdd96b8d0753016bb255108a3adb3ba86 -Author: Måns Rullgård -Date: Tue Oct 19 21:35:18 2004 +0000 - - filesize (bits) in a 32 bit int will overflow after 250MB, screwing up - 2pass ratecontrol. - (patch by Loren Merritt ) - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@52 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 3f7206dc5bfb84ff4e1fb494d9ecf09e98baa937 -Author: Måns Rullgård -Date: Mon Oct 11 10:13:05 2004 +0000 - - fix compilation on FreeBSD (from Loren Merritt (thanks to Igla)) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@51 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48937c64c13904973f83495b64afdf4223f647f5 -Author: Laurent Aimar -Date: Wed Sep 29 16:05:24 2004 +0000 - - * ratecontrol: Patch by Loren Merritt : - - " This patch - * calculates average QP as a float, providing slightly improved - ratecontrol if the first pass was CBR. - * fixes the reported QP if you set both b_stat_read and b_stat_write, - allowing 3 pass encoding (or just examination of the 2nd pass's stats)." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@50 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e3ae8a7d1a3953c0f261cba6e1b161cfc7f1b0d6 -Author: Laurent Aimar -Date: Wed Sep 29 16:02:18 2004 +0000 - - * all: Patch by Loren Merritt. - - " This patch makes scene-cut detection based on the relative cost of I-frame - vs P-frame, rather than just on the number of I-blocks used. - It also makes the scene-cut threshold configurable. - - This doesn't have a very large effect: Most scene cuts are obvious to - either algorithm. But I think this way is better in some less clear cut - cases, and sometimes finds a better spot for an I-frame than just waiting - for the max I-frame interval." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@49 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 79a2bb78b970e55e92bb4a74ff5f88dc6d0a6851 -Author: Laurent Aimar -Date: Wed Sep 22 07:37:43 2004 +0000 - - * ratecontrol: added 'b' flag to fopen. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@48 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 48e288644ed493a77b5935e90010973a4e15faf6 -Author: Laurent Aimar -Date: Wed Sep 22 07:07:48 2004 +0000 - - * all: Patches by Loren Merritt: - "Improved patch. Now supports subpel ME on all candidate MB types, - not just on the winner. - - subpel_refine: (completely different scale from before) - 0 => halfpel only - 1 => 1 iteration of qpel on the winner (same as x264 r46) - 2 => 2 iterations of qpel (about the same as my earlier patch, but faster - 3 => halfpel on all MB types, qpel on the winner - 4 => qpel on all - 5 => more iterations - - benchmarks: - mencoder dvd://1 -ovc x264 -x264encopts - qp_constant=19:fullinter:cabac:iframe=200:psnr - - subpel_refine=1: PSNR Global:46.82 kb/s:1048.1 fps:17.335 - subpel_refine=2: PSNR Global:46.83 kb/s:1034.4 fps:16.970 - subpel_refine=3: PSNR Global:46.84 kb/s:1023.3 fps:14.770 - subpel_refine=4: PSNR Global:46.87 kb/s:1010.8 fps:11.598 - subpel_refine=5: PSNR Global:46.88 kb/s:1006.9 fps:10.824" - - And - - "The current code for calculating the cost of encoding which reference - frame a MB is predicted from, introduces a bias towards ref0 and - against P16x16. - Removing this bias produces an improvement of .4% - 2% bitrate, - depending on content and number of reference frames." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@47 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f9bd35a32d8de87c67749c1628ea7693a2b83460 -Author: Laurent Aimar -Date: Sun Aug 29 12:02:50 2004 +0000 - - * x264: added --ipratio --pbratio in help section. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@46 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8c6f6fa0db394e65fa9b4272deec859c9cb67aac -Author: Laurent Aimar -Date: Sun Aug 29 11:32:34 2004 +0000 - - * ratecontrol: path by Loren Merritt. - - "Use average qp instead of last qp in the frame for 2pass rc. - (Improves quality and rate accuracy if the first pass was cbr.)" - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@45 df754926-b1dd-0310-bc7b-ec298dee348c - -commit d46df39630925245993e3ecde07adce618d3c30a -Author: Laurent Aimar -Date: Sat Aug 28 22:30:44 2004 +0000 - - * x264: added --quiet and --no-psnr. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@44 df754926-b1dd-0310-bc7b-ec298dee348c - -commit cba0cd394dfd87f82daa4b621dd9b701fca5bb9f -Author: Laurent Aimar -Date: Sat Aug 28 22:19:47 2004 +0000 - - * eval.c: lalala ;) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@43 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8a5aa764ab46fd04dd60acfa3a6641742c4b0daa -Author: Laurent Aimar -Date: Sat Aug 28 22:19:15 2004 +0000 - - * added Loren Merritt. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@42 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 58b7012e219b64752168084fa028e76792b6f42a -Author: Laurent Aimar -Date: Sat Aug 28 22:16:48 2004 +0000 - - * all: added eval.c (I hope libx264.dsp is correct, I can't test). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@41 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9116d300befee74c92ff3ac9fe7625a57dafab48 -Author: Laurent Aimar -Date: Sat Aug 28 22:14:26 2004 +0000 - - * all: 2pass patch by Loren Merritt - - "Mostly borrowed from libavcodec. - There is not much theoretical basis behind my choice of defaults for - rc_eq, qcompress, qblur, and ip_factor." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@40 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 67b673006c7b13a3435e89ccf0a83ebbdd23937c -Author: Laurent Aimar -Date: Sat Aug 28 19:24:08 2004 +0000 - - * all: first part of the 2pass patch by Loren Merritt - (only the header/textures bits computed for now). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@39 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 0a7d38ca3a65133af46a438d00ff25948a78019a -Author: Laurent Aimar -Date: Sun Aug 22 15:01:46 2004 +0000 - - * all: include stdarg.h (needed for x264_log) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@38 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 63e5a5c8865f76d9fb0af10ee238fdb1bea1178c -Author: Måns Rullgård -Date: Wed Aug 18 09:28:56 2004 +0000 - - Use x264_log() in ratecontrol.c - - - git-svn-id: svn://svn.videolan.org/x264/trunk@37 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f246a87fa64a4febe10a17e983f1b656d7914625 -Author: Laurent Aimar -Date: Tue Aug 17 21:08:23 2004 +0000 - - * encoder/encoder.c: oops. (fixed compilation). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@36 df754926-b1dd-0310-bc7b-ec298dee348c - -commit dab6f065ffd7ab2ecc591ef22e0d556a3516a48f -Author: Laurent Aimar -Date: Tue Aug 17 20:39:03 2004 +0000 - - * all: more fprintf -> x264_log. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@35 df754926-b1dd-0310-bc7b-ec298dee348c - -commit f53a7ae05c5f348baf1322d6d233aba899287df0 -Author: Laurent Aimar -Date: Tue Aug 17 20:27:05 2004 +0000 - - * all: added a x264_param_t.analyse.b_psnr - - - git-svn-id: svn://svn.videolan.org/x264/trunk@34 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 72eced43c35ecce69b422c2da228ae044430c038 -Author: Laurent Aimar -Date: Tue Aug 17 20:03:46 2004 +0000 - - * encoder/encoder.c: kb/s with k=1000 (more consistant). Patch by Loren - Merritt - - - git-svn-id: svn://svn.videolan.org/x264/trunk@33 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a01315f4e71b38b63ca08f9453b3409bf4f044b8 -Author: Laurent Aimar -Date: Tue Aug 17 19:56:36 2004 +0000 - - * all: introduced a x264_log function. It's not yet used everywhere - but we should start using it :) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@32 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5ffe5a90e7553c20f40dcb1ae372579a941280cb -Author: Eric Petit -Date: Mon Aug 16 08:52:05 2004 +0000 - - OS X is missing exp2f() - - - git-svn-id: svn://svn.videolan.org/x264/trunk@31 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 348de7f684821ffbb5e8a3a52969986de00c89c8 -Author: Eric Petit -Date: Mon Aug 16 08:47:51 2004 +0000 - - Fixed warnings with PPC 64 - - git-svn-id: svn://svn.videolan.org/x264/trunk@30 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2885471d5ea17e4e84b9e2551d8c1e2049bb3d7f -Author: Måns Rullgård -Date: Fri Aug 13 13:36:14 2004 +0000 - - Add my svn user name. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@29 df754926-b1dd-0310-bc7b-ec298dee348c - -commit ed61d8ee02dabbfc7c667ee4711e5c9cd95f2032 -Author: Måns Rullgård -Date: Fri Aug 13 13:34:47 2004 +0000 - - Bugfix. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@28 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 6b20e508a7f7110b4c951fbdc4a971cc5c14494d -Author: Måns Rullgård -Date: Thu Aug 12 20:52:24 2004 +0000 - - Include timing info in VUI. - Change frame rate from float to fraction (sorry for the inconvenience). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@27 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 2b3cd6c669b64046a54b32577946bc262360d8ae -Author: Måns Rullgård -Date: Thu Aug 12 13:07:41 2004 +0000 - - Add TAGS rule. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@26 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 444615252b766c95c3dcbb11327c379101929468 -Author: Måns Rullgård -Date: Wed Aug 11 20:24:20 2004 +0000 - - Fixes by Loren Merritt (lorenm at u.washington.edu). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@25 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 78292e08d9e9770b1c74766368670eeeb3f02e3e -Author: Måns Rullgård -Date: Wed Aug 11 01:02:05 2004 +0000 - - Get rid of integer overflows that caused the rate control to go - haywire in some situations. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@24 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 374baca15e0ce069cadd4c32633f915b6f8294b0 -Author: Laurent Aimar -Date: Mon Aug 9 00:05:22 2004 +0000 - - * encoder: correct range for i_idr_pic_id is 0..65535 - (Not 0..65534) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@23 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 07a0494bd1b2c1ec05866d09bf84d0564d6e5080 -Author: Laurent Aimar -Date: Sun Aug 8 21:36:41 2004 +0000 - - ratecontrol: patch by Loren Merritt - - "The new cbr mode fails to completely disable itself when encoding in - constant QP mode. The per-block QPs are then randomized between QP+4 and - QP-2 based on uninitialized ratecontrol parameters." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@22 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 249259d03700724183b879cd504a172d9c2d35f6 -Author: Laurent Aimar -Date: Sun Aug 8 19:15:10 2004 +0000 - - * ratecontrol: patch by Måns Rullgård - "This patch fixes a small bug (divide by 0 possible) in the rate control." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@21 df754926-b1dd-0310-bc7b-ec298dee348c - -commit e96703d73226f61149e2c815a03f8443a620ffff -Author: Laurent Aimar -Date: Sun Aug 8 16:18:49 2004 +0000 - - * encoder: simpler scene cut detection (seems better but do not check - size anymore, so need more testing). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@20 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 11e1b0c27fdd2213007fdb91f40d0fc2a1c11569 -Author: Laurent Aimar -Date: Sun Aug 8 14:23:50 2004 +0000 - - * all: Change the way PSNR is computed (based on a patch by Loren - Merritt - Using SQE(DeltaSourceReconstructed) = Sum( delta^2 ) - PSNR( SQE, Size ) = -10Ln(SQE / 255^2 / Size )/Ln(10) ) - Y+U+V : Union of YUV planes. - - Now there is - - Mean PSNR : Sum( PSNR( SQE(Y/U/V), Size(Y/U/V) ) / TotalFrames - - Average PSNR: Sum( PSNR( SQE(Y+U+V), Size(Y+U+V) ) ) / TotalFrames - - Global PSNR: PSNR( Sum( SQE(Y+U+V) ), Size(Y+U+V)*TotalFrames ) - - Mean PSNR is used by the JM, and Average/Overall is used on Doom9 for - example. - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@19 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 9168b245fb9b7e84e4c55faba839b89a4b54a48b -Author: Laurent Aimar -Date: Sat Aug 7 16:02:20 2004 +0000 - - * x264.h: increased X264_BUILD. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@18 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 20c19c4b3a001c9e02775fdba040725a438db795 -Author: Laurent Aimar -Date: Fri Aug 6 18:06:09 2004 +0000 - - * all: Patch from Måns Rullgård - - "Here's a patch that adds some kind of rate control. I suppose it is - by no means perfect, but it's much better than constant quantizer. It - also has a very crude scene change detection that sometimes avoids a - buffer underflow by reencoding oversized P/B frames as I frames." - - - - git-svn-id: svn://svn.videolan.org/x264/trunk@17 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 30a244d00365a81999c82bdd3dd8beb0d036d36a -Author: Eric Petit -Date: Mon Aug 2 07:05:05 2004 +0000 - - Linux PPC AltiVec fix - - - git-svn-id: svn://svn.videolan.org/x264/trunk@16 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b0495a99ee0b65b03be7d6961ddb70ef7e38dcf0 -Author: Eric Petit -Date: Wed Jul 28 21:39:06 2004 +0000 - - BeOS fixes (no stdint.h, no libm) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@15 df754926-b1dd-0310-bc7b-ec298dee348c - -commit bf06e99e9b054a3f671a6f3f0c62d9b204057b0b -Author: Eric Petit -Date: Tue Jul 27 08:34:59 2004 +0000 - - Attempt to fix build on Linux PPC - - - git-svn-id: svn://svn.videolan.org/x264/trunk@14 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 86ca49033ad85660dc88f82ab721263f4d29290e -Author: Laurent Aimar -Date: Fri Jul 23 18:14:59 2004 +0000 - - * encoder.c, analyse.c, macroblock: fixed when using a qp per MB. - (Buggy for pskip and mb with null cbp luma and chroma). - * dct*: fixed order of idct. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@13 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 55eb54c7e47a8f98f4382a953e03ee414972c36f -Author: Laurent Aimar -Date: Fri Jul 16 18:26:19 2004 +0000 - - * cpu.asm: mmh trashing ebp,esi and edi isn't a good idea I fear ;) - - - git-svn-id: svn://svn.videolan.org/x264/trunk@12 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a8703c8933f51715c25118eb83487072a548934e -Author: Laurent Aimar -Date: Tue Jun 29 22:41:42 2004 +0000 - - * all: fixed ss2 runtime selection. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@11 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 02713c2401b471fb3bfa4541e8b867dfd06628cc -Author: Min Chen -Date: Fri Jun 18 02:00:40 2004 +0000 - - update & SSE2 support - - - git-svn-id: svn://svn.videolan.org/x264/trunk@10 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 77bce7d16d8a2fc12aff32997def2f966975617c -Author: Min Chen -Date: Thu Jun 17 09:01:19 2004 +0000 - - update - - - git-svn-id: svn://svn.videolan.org/x264/trunk@9 df754926-b1dd-0310-bc7b-ec298dee348c - -commit c7631faf30ef78cc254a7ad8a7552e65824507d8 -Author: Min Chen -Date: Thu Jun 17 08:58:43 2004 +0000 - - remove some unused code - - - git-svn-id: svn://svn.videolan.org/x264/trunk@8 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 8d3d88be0fe3a22ea39861321eed015efb454359 -Author: Min Chen -Date: Mon Jun 14 05:47:51 2004 +0000 - - support for build checkasm.exe - - - git-svn-id: svn://svn.videolan.org/x264/trunk@7 df754926-b1dd-0310-bc7b-ec298dee348c - -commit b2e2e34b3415cb9429475c1b828dcb2c36855308 -Author: Laurent Aimar -Date: Thu Jun 10 18:13:38 2004 +0000 - - * build fix (thx xxcd). - - - git-svn-id: svn://svn.videolan.org/x264/trunk@6 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 4fb5f9aa45f1a71fd718eaabd0992b8086887fa0 -Author: VideoLAN -Date: Thu Jun 10 07:32:18 2004 +0000 - - * TODO: test. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@5 df754926-b1dd-0310-bc7b-ec298dee348c - -commit a511bbecc348964c9de501a954c08f1b3bd4644d -Author: Laurent Aimar -Date: Wed Jun 9 19:35:31 2004 +0000 - - * vfw/* : oops... - - - git-svn-id: svn://svn.videolan.org/x264/trunk@4 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 166ed2dd0b3ef0a89f64371e78c70a1b8f874ddd -Author: Laurent Aimar -Date: Wed Jun 9 19:35:07 2004 +0000 - - * mc-c.c compilation fix for gcc >= 3.3 - - - git-svn-id: svn://svn.videolan.org/x264/trunk@3 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 602c87d5a3fb75cf7404bae08ecc3d5bc5ab1372 -Author: Laurent Aimar -Date: Thu Jun 3 19:29:57 2004 +0000 - - * all: re-import of CVS. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@2 df754926-b1dd-0310-bc7b-ec298dee348c - -commit 5dc0aae2f900064d1f58579929a2285ab289a436 -Author: Laurent Aimar -Date: Thu Jun 3 19:29:33 2004 +0000 - - * all: re-import of the CVS. - - - git-svn-id: svn://svn.videolan.org/x264/trunk@1 df754926-b1dd-0310-bc7b-ec298dee348c diff -Nru x264-0.157.2935+git545de2f/common/aarch64/asm-offsets.c x264-0.160.3011+gitcde9a93/common/aarch64/asm-offsets.c --- x264-0.157.2935+git545de2f/common/aarch64/asm-offsets.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/asm-offsets.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm-offsets.c: check asm offsets for aarch64 ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/asm-offsets.h x264-0.160.3011+gitcde9a93/common/aarch64/asm-offsets.h --- x264-0.157.2935+git545de2f/common/aarch64/asm-offsets.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/asm-offsets.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm-offsets.h: asm offsets for aarch64 ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/asm.S x264-0.160.3011+gitcde9a93/common/aarch64/asm.S --- x264-0.157.2935+git545de2f/common/aarch64/asm.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/asm.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: AArch64 utility macros ***************************************************************************** - * Copyright (C) 2008-2018 x264 project + * Copyright (C) 2008-2020 x264 project * * Authors: Mans Rullgard * David Conrad diff -Nru x264-0.157.2935+git545de2f/common/aarch64/bitstream-a.S x264-0.160.3011+gitcde9a93/common/aarch64/bitstream-a.S --- x264-0.157.2935+git545de2f/common/aarch64/bitstream-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/bitstream-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream-a.S: aarch64 bitstream functions ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/bitstream.h x264-0.160.3011+gitcde9a93/common/aarch64/bitstream.h --- x264-0.157.2935+git545de2f/common/aarch64/bitstream.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/bitstream.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: aarch64 bitstream functions ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/cabac-a.S x264-0.160.3011+gitcde9a93/common/aarch64/cabac-a.S --- x264-0.157.2935+git545de2f/common/aarch64/cabac-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/cabac-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac-a.S: aarch64 cabac ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/dct-a.S x264-0.160.3011+gitcde9a93/common/aarch64/dct-a.S --- x264-0.157.2935+git545de2f/common/aarch64/dct-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/dct-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/dct.h x264-0.160.3011+gitcde9a93/common/aarch64/dct.h --- x264-0.157.2935+git545de2f/common/aarch64/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: aarch64 transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/deblock-a.S x264-0.160.3011+gitcde9a93/common/aarch64/deblock-a.S --- x264-0.157.2935+git545de2f/common/aarch64/deblock-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/deblock-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: aarch64 deblocking ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Mans Rullgard * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/deblock.h x264-0.160.3011+gitcde9a93/common/aarch64/deblock.h --- x264-0.157.2935+git545de2f/common/aarch64/deblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/deblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.h: aarch64 deblocking ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/aarch64/mc-a.S x264-0.160.3011+gitcde9a93/common/aarch64/mc-a.S --- x264-0.157.2935+git545de2f/common/aarch64/mc-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/mc-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/mc-c.c x264-0.160.3011+gitcde9a93/common/aarch64/mc-c.c --- x264-0.157.2935+git545de2f/common/aarch64/mc-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/mc-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau @@ -272,7 +272,7 @@ PROPAGATE_LIST(neon) #endif // !HIGH_BIT_DEPTH -void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ) +void x264_mc_init_aarch64( uint32_t cpu, x264_mc_functions_t *pf ) { #if !HIGH_BIT_DEPTH if( cpu&X264_CPU_ARMV8 ) diff -Nru x264-0.157.2935+git545de2f/common/aarch64/mc.h x264-0.160.3011+gitcde9a93/common/aarch64/mc.h --- x264-0.157.2935+git545de2f/common/aarch64/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: aarch64 motion compensation ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * @@ -27,6 +27,6 @@ #define X264_AARCH64_MC_H #define x264_mc_init_aarch64 x264_template(mc_init_aarch64) -void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ); +void x264_mc_init_aarch64( uint32_t cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.157.2935+git545de2f/common/aarch64/pixel-a.S x264-0.160.3011+gitcde9a93/common/aarch64/pixel-a.S --- x264-0.157.2935+git545de2f/common/aarch64/pixel-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/pixel-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/pixel.h x264-0.160.3011+gitcde9a93/common/aarch64/pixel.h --- x264-0.157.2935+git545de2f/common/aarch64/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: aarch64 pixel metrics ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/predict-a.S x264-0.160.3011+gitcde9a93/common/aarch64/predict-a.S --- x264-0.157.2935+git545de2f/common/aarch64/predict-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/predict-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Mans Rullgard diff -Nru x264-0.157.2935+git545de2f/common/aarch64/predict-c.c x264-0.160.3011+gitcde9a93/common/aarch64/predict-c.c --- x264-0.157.2935+git545de2f/common/aarch64/predict-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/predict-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau @@ -28,7 +28,7 @@ #include "predict.h" #include "pixel.h" -void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] ) +void x264_predict_4x4_init_aarch64( uint32_t cpu, x264_predict_t pf[12] ) { #if !HIGH_BIT_DEPTH if( cpu&X264_CPU_ARMV8 ) @@ -47,7 +47,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x8c_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ) { #if !HIGH_BIT_DEPTH if( cpu&X264_CPU_ARMV8 ) @@ -67,7 +67,7 @@ } -void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x16c_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_NEON) ) return; @@ -82,7 +82,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) +void x264_predict_8x8_init_aarch64( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) { if( !(cpu&X264_CPU_NEON) ) return; @@ -100,7 +100,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] ) +void x264_predict_16x16_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_NEON) ) return; diff -Nru x264-0.157.2935+git545de2f/common/aarch64/predict.h x264-0.160.3011+gitcde9a93/common/aarch64/predict.h --- x264-0.157.2935+git545de2f/common/aarch64/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: aarch64 intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau @@ -106,14 +106,14 @@ void x264_predict_16x16_dc_neon( uint8_t *src ); #define x264_predict_4x4_init_aarch64 x264_template(predict_4x4_init_aarch64) -void x264_predict_4x4_init_aarch64( int cpu, x264_predict_t pf[12] ); +void x264_predict_4x4_init_aarch64( uint32_t cpu, x264_predict_t pf[12] ); #define x264_predict_8x8_init_aarch64 x264_template(predict_8x8_init_aarch64) -void x264_predict_8x8_init_aarch64( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); +void x264_predict_8x8_init_aarch64( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); #define x264_predict_8x8c_init_aarch64 x264_template(predict_8x8c_init_aarch64) -void x264_predict_8x8c_init_aarch64( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x8c_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x16c_init_aarch64 x264_template(predict_8x16c_init_aarch64) -void x264_predict_8x16c_init_aarch64( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x16c_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_16x16_init_aarch64 x264_template(predict_16x16_init_aarch64) -void x264_predict_16x16_init_aarch64( int cpu, x264_predict_t pf[7] ); +void x264_predict_16x16_init_aarch64( uint32_t cpu, x264_predict_t pf[7] ); #endif /* X264_AARCH64_PREDICT_H */ diff -Nru x264-0.157.2935+git545de2f/common/aarch64/quant-a.S x264-0.160.3011+gitcde9a93/common/aarch64/quant-a.S --- x264-0.157.2935+git545de2f/common/aarch64/quant-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/quant-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/aarch64/quant.h x264-0.160.3011+gitcde9a93/common/aarch64/quant.h --- x264-0.157.2935+git545de2f/common/aarch64/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/aarch64/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/arm/asm.S x264-0.160.3011+gitcde9a93/common/arm/asm.S --- x264-0.157.2935+git545de2f/common/arm/asm.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/asm.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * asm.S: arm utility macros ***************************************************************************** - * Copyright (C) 2008-2018 x264 project + * Copyright (C) 2008-2020 x264 project * * Authors: Mans Rullgard * David Conrad diff -Nru x264-0.157.2935+git545de2f/common/arm/bitstream-a.S x264-0.160.3011+gitcde9a93/common/arm/bitstream-a.S --- x264-0.157.2935+git545de2f/common/arm/bitstream-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/bitstream-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream-a.S: arm bitstream functions ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Janne Grunau * diff -Nru x264-0.157.2935+git545de2f/common/arm/bitstream.h x264-0.160.3011+gitcde9a93/common/arm/bitstream.h --- x264-0.157.2935+git545de2f/common/arm/bitstream.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/bitstream.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: arm bitstream functions ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/arm/cpu-a.S x264-0.160.3011+gitcde9a93/common/arm/cpu-a.S --- x264-0.157.2935+git545de2f/common/arm/cpu-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/cpu-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu-a.S: arm cpu detection ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * diff -Nru x264-0.157.2935+git545de2f/common/arm/dct-a.S x264-0.160.3011+gitcde9a93/common/arm/dct-a.S --- x264-0.157.2935+git545de2f/common/arm/dct-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/dct-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * dct-a.S: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Martin Storsjo @@ -725,15 +725,15 @@ vadd.s16 q3, q3, q15 vsub.s16 d17, d0, d1 @ b4 vadd.s16 d18, d2, d3 @ b1 - vsub.s16 d19, d2, d3 @ b5 + vsub.s16 d19, d2, d3 @ b5 vadd.s16 d20, d4, d5 @ b2 - vsub.s16 d21, d4, d5 @ b6 + vsub.s16 d21, d4, d5 @ b6 vadd.s16 d22, d6, d7 @ b3 - vsub.s16 d23, d6, d7 @ b7 + vsub.s16 d23, d6, d7 @ b7 vadd.s16 q0, q8, q9 @ b0 + b1, b4 + b5; a0, a2 - vsub.s16 q1, q8, q9 @ b0 - b1, b4 - b5; a4, a6 + vsub.s16 q1, q8, q9 @ b0 - b1, b4 - b5; a4, a6 vadd.s16 q2, q10, q11 @ b2 + b3, b6 + b7; a1, a3 - vsub.s16 q3, q10, q11 @ b2 - b3, b6 - b7; a5, a7 + vsub.s16 q3, q10, q11 @ b2 - b3, b6 - b7; a5, a7 vadd.s16 q8, q0, q2 @ a0 + a1, a2 + a3 vsub.s16 q9, q0, q2 @ a0 - a1, a2 - a3 diff -Nru x264-0.157.2935+git545de2f/common/arm/dct.h x264-0.160.3011+gitcde9a93/common/arm/dct.h --- x264-0.157.2935+git545de2f/common/arm/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: arm transform and zigzag ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * diff -Nru x264-0.157.2935+git545de2f/common/arm/deblock-a.S x264-0.160.3011+gitcde9a93/common/arm/deblock-a.S --- x264-0.157.2935+git545de2f/common/arm/deblock-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/deblock-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.S: arm deblocking ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Mans Rullgard * Martin Storsjo diff -Nru x264-0.157.2935+git545de2f/common/arm/deblock.h x264-0.160.3011+gitcde9a93/common/arm/deblock.h --- x264-0.157.2935+git545de2f/common/arm/deblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/deblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.h: arm deblocking ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/arm/mc-a.S x264-0.160.3011+gitcde9a93/common/arm/mc-a.S --- x264-0.157.2935+git545de2f/common/arm/mc-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/mc-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.S: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Mans Rullgard diff -Nru x264-0.157.2935+git545de2f/common/arm/mc-c.c x264-0.160.3011+gitcde9a93/common/arm/mc-c.c --- x264-0.157.2935+git545de2f/common/arm/mc-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/mc-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau @@ -296,7 +296,7 @@ PROPAGATE_LIST(neon) #endif // !HIGH_BIT_DEPTH -void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) +void x264_mc_init_arm( uint32_t cpu, x264_mc_functions_t *pf ) { if( !(cpu&X264_CPU_ARMV6) ) return; diff -Nru x264-0.157.2935+git545de2f/common/arm/mc.h x264-0.160.3011+gitcde9a93/common/arm/mc.h --- x264-0.157.2935+git545de2f/common/arm/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: arm motion compensation ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * @@ -27,6 +27,6 @@ #define X264_ARM_MC_H #define x264_mc_init_arm x264_template(mc_init_arm) -void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ); +void x264_mc_init_arm( uint32_t cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.157.2935+git545de2f/common/arm/pixel-a.S x264-0.160.3011+gitcde9a93/common/arm/pixel-a.S --- x264-0.157.2935+git545de2f/common/arm/pixel-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/pixel-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.S: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/arm/pixel.h x264-0.160.3011+gitcde9a93/common/arm/pixel.h --- x264-0.157.2935+git545de2f/common/arm/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: arm pixel metrics ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * diff -Nru x264-0.157.2935+git545de2f/common/arm/predict-a.S x264-0.160.3011+gitcde9a93/common/arm/predict-a.S --- x264-0.157.2935+git545de2f/common/arm/predict-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/predict-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.S: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Mans Rullgard diff -Nru x264-0.157.2935+git545de2f/common/arm/predict-c.c x264-0.160.3011+gitcde9a93/common/arm/predict-c.c --- x264-0.157.2935+git545de2f/common/arm/predict-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/predict-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * @@ -27,7 +27,7 @@ #include "predict.h" #include "pixel.h" -void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] ) +void x264_predict_4x4_init_arm( uint32_t cpu, x264_predict_t pf[12] ) { if( !(cpu&X264_CPU_ARMV6) ) return; @@ -46,7 +46,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x8c_init_arm( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_NEON) ) return; @@ -61,7 +61,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x16c_init_arm( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_NEON) ) return; @@ -74,7 +74,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) +void x264_predict_8x8_init_arm( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) { if( !(cpu&X264_CPU_NEON) ) return; @@ -92,7 +92,7 @@ #endif // !HIGH_BIT_DEPTH } -void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] ) +void x264_predict_16x16_init_arm( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_NEON) ) return; diff -Nru x264-0.157.2935+git545de2f/common/arm/predict.h x264-0.160.3011+gitcde9a93/common/arm/predict.h --- x264-0.157.2935+git545de2f/common/arm/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: arm intra prediction ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * @@ -92,14 +92,14 @@ void x264_predict_16x16_p_neon( uint8_t *src ); #define x264_predict_4x4_init_arm x264_template(predict_4x4_init_arm) -void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] ); +void x264_predict_4x4_init_arm( uint32_t cpu, x264_predict_t pf[12] ); #define x264_predict_8x8_init_arm x264_template(predict_8x8_init_arm) -void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); +void x264_predict_8x8_init_arm( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); #define x264_predict_8x8c_init_arm x264_template(predict_8x8c_init_arm) -void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x8c_init_arm( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x16c_init_arm x264_template(predict_8x16c_init_arm) -void x264_predict_8x16c_init_arm( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x16c_init_arm( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_16x16_init_arm x264_template(predict_16x16_init_arm) -void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] ); +void x264_predict_16x16_init_arm( uint32_t cpu, x264_predict_t pf[7] ); #endif diff -Nru x264-0.157.2935+git545de2f/common/arm/quant-a.S x264-0.160.3011+gitcde9a93/common/arm/quant-a.S --- x264-0.157.2935+git545de2f/common/arm/quant-a.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/quant-a.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * quant.S: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: David Conrad * Janne Grunau diff -Nru x264-0.157.2935+git545de2f/common/arm/quant.h x264-0.160.3011+gitcde9a93/common/arm/quant.h --- x264-0.157.2935+git545de2f/common/arm/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/arm/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: arm quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: David Conrad * diff -Nru x264-0.157.2935+git545de2f/common/base.c x264-0.160.3011+gitcde9a93/common/base.c --- x264-0.157.2935+git545de2f/common/base.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/base.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * base.c: misc common functions (bit depth independent) ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -99,13 +99,18 @@ /**************************************************************************** * x264_malloc: ****************************************************************************/ -void *x264_malloc( int i_size ) +void *x264_malloc( int64_t i_size ) { +#define HUGE_PAGE_SIZE 2*1024*1024 +#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ + if( i_size < 0 || (uint64_t)i_size > (SIZE_MAX - HUGE_PAGE_SIZE) /*|| (uint64_t)i_size > (SIZE_MAX - NATIVE_ALIGN - sizeof(void **))*/ ) + { + x264_log_internal( X264_LOG_ERROR, "invalid size of malloc: %"PRId64"\n", i_size ); + return NULL; + } uint8_t *align_buf = NULL; #if HAVE_MALLOC_H #if HAVE_THP -#define HUGE_PAGE_SIZE 2*1024*1024 -#define HUGE_PAGE_THRESHOLD HUGE_PAGE_SIZE*7/8 /* FIXME: Is this optimal? */ /* Attempt to allocate huge pages to reduce TLB misses. */ if( i_size >= HUGE_PAGE_THRESHOLD ) { @@ -118,8 +123,6 @@ } } else -#undef HUGE_PAGE_SIZE -#undef HUGE_PAGE_THRESHOLD #endif align_buf = memalign( NATIVE_ALIGN, i_size ); #else @@ -132,8 +135,10 @@ } #endif if( !align_buf ) - x264_log_internal( X264_LOG_ERROR, "malloc of size %d failed\n", i_size ); + x264_log_internal( X264_LOG_ERROR, "malloc of size %"PRId64" failed\n", i_size ); return align_buf; +#undef HUGE_PAGE_SIZE +#undef HUGE_PAGE_THRESHOLD } /**************************************************************************** @@ -175,7 +180,7 @@ if( !buf ) goto error; - b_error |= fread( buf, 1, i_size, fh ) != i_size; + b_error |= fread( buf, 1, i_size, fh ) != (uint64_t)i_size; fclose( fh ); if( b_error ) { @@ -196,7 +201,7 @@ /**************************************************************************** * x264_picture_init: ****************************************************************************/ -static void picture_init( x264_picture_t *pic ) +REALIGN_STACK void x264_picture_init( x264_picture_t *pic ) { memset( pic, 0, sizeof( x264_picture_t ) ); pic->i_type = X264_TYPE_AUTO; @@ -204,15 +209,10 @@ pic->i_pic_struct = PIC_STRUCT_AUTO; } -void x264_picture_init( x264_picture_t *pic ) -{ - x264_stack_align( picture_init, pic ); -} - /**************************************************************************** * x264_picture_alloc: ****************************************************************************/ -static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) +REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) { typedef struct { @@ -223,6 +223,7 @@ static const x264_csp_tab_t csp_tab[] = { + [X264_CSP_I400] = { 1, { 256*1 }, { 256*1 } }, [X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, [X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } }, [X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, }, @@ -242,16 +243,16 @@ int csp = i_csp & X264_CSP_MASK; if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 ) return -1; - picture_init( pic ); + x264_picture_init( pic ); pic->img.i_csp = i_csp; pic->img.i_plane = csp_tab[csp].planes; int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1; - int plane_offset[3] = {0}; - int frame_size = 0; + int64_t plane_offset[3] = {0}; + int64_t frame_size = 0; for( int i = 0; i < pic->img.i_plane; i++ ) { int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor; - int plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride; + int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride; pic->img.i_stride[i] = stride; plane_offset[i] = frame_size; frame_size += plane_size; @@ -264,15 +265,10 @@ return 0; } -int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ) -{ - return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height ); -} - /**************************************************************************** * x264_picture_clean: ****************************************************************************/ -static void picture_clean( x264_picture_t *pic ) +REALIGN_STACK void x264_picture_clean( x264_picture_t *pic ) { x264_free( pic->img.plane[0] ); @@ -280,15 +276,10 @@ memset( pic, 0, sizeof( x264_picture_t ) ); } -void x264_picture_clean( x264_picture_t *pic ) -{ - x264_stack_align( picture_clean, pic ); -} - /**************************************************************************** * x264_param_default: ****************************************************************************/ -static void param_default( x264_param_t *param ) +REALIGN_STACK void x264_param_default( x264_param_t *param ) { /* */ memset( param, 0, sizeof( x264_param_t ) ); @@ -433,16 +424,11 @@ param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC; } -void x264_param_default( x264_param_t *param ) -{ - x264_stack_align( param_default, param ); -} - static int param_apply_preset( x264_param_t *param, const char *preset ) { char *end; int i = strtol( preset, &end, 10 ); - if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 ) + if( *end == 0 && i >= 0 && i < ARRAY_ELEMS(x264_preset_names)-1 ) preset = x264_preset_names[i]; if( !strcasecmp( preset, "ultrafast" ) ) @@ -459,6 +445,7 @@ param->analyse.i_subpel_refine = 0; param->rc.i_aq_mode = 0; param->analyse.b_mixed_references = 0; + param->analyse.i_trellis = 0; param->i_bframe_adaptive = X264_B_ADAPT_NONE; param->rc.b_mb_tree = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; @@ -561,22 +548,17 @@ static int param_apply_tune( x264_param_t *param, const char *tune ) { - char *tmp = x264_malloc( strlen( tune ) + 1 ); - if( !tmp ) - return -1; - tmp = strcpy( tmp, tune ); - char *s = strtok( tmp, ",./-+" ); int psy_tuning_used = 0; - while( s ) + for( int len; tune += strspn( tune, ",./-+" ), (len = strcspn( tune, ",./-+" )); tune += len ) { - if( !strncasecmp( s, "film", 4 ) ) + if( len == 4 && !strncasecmp( tune, "film", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -1; param->i_deblocking_filter_beta = -1; param->analyse.f_psy_trellis = 0.15; } - else if( !strncasecmp( s, "animation", 9 ) ) + else if( len == 9 && !strncasecmp( tune, "animation", 9 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; @@ -586,7 +568,7 @@ param->rc.f_aq_strength = 0.6; param->i_bframe += 2; } - else if( !strncasecmp( s, "grain", 5 ) ) + else if( len == 5 && !strncasecmp( tune, "grain", 5 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -2; @@ -600,7 +582,7 @@ param->analyse.i_luma_deadzone[1] = 6; param->rc.f_qcompress = 0.8; } - else if( !strncasecmp( s, "stillimage", 10 ) ) + else if( len == 10 && !strncasecmp( tune, "stillimage", 10 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -3; @@ -609,26 +591,26 @@ param->analyse.f_psy_trellis = 0.7; param->rc.f_aq_strength = 1.2; } - else if( !strncasecmp( s, "psnr", 4 ) ) + else if( len == 4 && !strncasecmp( tune, "psnr", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->rc.i_aq_mode = X264_AQ_NONE; param->analyse.b_psy = 0; } - else if( !strncasecmp( s, "ssim", 4 ) ) + else if( len == 4 && !strncasecmp( tune, "ssim", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE; param->analyse.b_psy = 0; } - else if( !strncasecmp( s, "fastdecode", 10 ) ) + else if( len == 10 && !strncasecmp( tune, "fastdecode", 10 ) ) { param->b_deblocking_filter = 0; param->b_cabac = 0; param->analyse.b_weighted_bipred = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; } - else if( !strncasecmp( s, "zerolatency", 11 ) ) + else if( len == 11 && !strncasecmp( tune, "zerolatency", 11 ) ) { param->rc.i_lookahead = 0; param->i_sync_lookahead = 0; @@ -637,7 +619,7 @@ param->b_vfr_input = 0; param->rc.b_mb_tree = 0; } - else if( !strncasecmp( s, "touhou", 6 ) ) + else if( len == 6 && !strncasecmp( tune, "touhou", 6 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; @@ -650,24 +632,18 @@ } else { - x264_log_internal( X264_LOG_ERROR, "invalid tune '%s'\n", s ); - x264_free( tmp ); + x264_log_internal( X264_LOG_ERROR, "invalid tune '%.*s'\n", len, tune ); return -1; - } - if( 0 ) - { psy_failure: - x264_log_internal( X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %s\n", s ); + x264_log_internal( X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %.*s\n", len, tune ); } - s = strtok( NULL, ",./-+" ); } - x264_free( tmp ); return 0; } -static int param_default_preset( x264_param_t *param, const char *preset, const char *tune ) +REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune ) { - param_default( param ); + x264_param_default( param ); if( preset && param_apply_preset( param, preset ) < 0 ) return -1; @@ -676,12 +652,7 @@ return 0; } -int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune ) -{ - return x264_stack_align( param_default_preset, param, preset, tune ); -} - -static void param_apply_fastfirstpass( x264_param_t *param ) +REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param ) { /* Set faster options in case of turbo firstpass. */ if( param->rc.b_stat_write && !param->rc.b_stat_read ) @@ -696,11 +667,6 @@ } } -void x264_param_apply_fastfirstpass( x264_param_t *param ) -{ - x264_stack_align( param_apply_fastfirstpass, param ); -} - static int profile_string_to_int( const char *str ) { if( !strcasecmp( str, "baseline" ) ) @@ -718,7 +684,7 @@ return -1; } -static int param_apply_profile( x264_param_t *param, const char *profile ) +REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile ) { if( !profile ) return 0; @@ -785,11 +751,6 @@ return 0; } -int x264_param_apply_profile( x264_param_t *param, const char *profile ) -{ - return x264_stack_align( param_apply_profile, param, profile ); -} - static int parse_enum( const char *arg, const char * const *names, int *dst ) { for( int i = 0; names[i]; i++ ) @@ -851,7 +812,7 @@ #define atoi(str) atoi_internal( str, &b_error ) #define atof(str) atof_internal( str, &b_error ) -static int param_parse( x264_param_t *p, const char *name, const char *value ) +REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value ) { char *name_buf = NULL; int b_error = 0; @@ -1298,7 +1259,7 @@ OPT("zones") p->rc.psz_zones = strdup(value); OPT("crop-rect") - b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top, + b_error |= sscanf( value, "%d,%d,%d,%d", &p->crop_rect.i_left, &p->crop_rect.i_top, &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4; OPT("psnr") p->analyse.b_psnr = atobool(value); @@ -1352,11 +1313,6 @@ return b_error ? errortype : 0; } -int x264_param_parse( x264_param_t *param, const char *name, const char *value ) -{ - return x264_stack_align( param_parse, param, name, value ); -} - /**************************************************************************** * x264_param2string: ****************************************************************************/ @@ -1469,7 +1425,7 @@ if( p->rc.i_vbv_buffer_size ) s += sprintf( s, " nal_hrd=%s filler=%d", x264_nal_hrd_names[p->i_nal_hrd], p->rc.b_filler ); if( p->crop_rect.i_left | p->crop_rect.i_top | p->crop_rect.i_right | p->crop_rect.i_bottom ) - s += sprintf( s, " crop_rect=%u,%u,%u,%u", p->crop_rect.i_left, p->crop_rect.i_top, + s += sprintf( s, " crop_rect=%d,%d,%d,%d", p->crop_rect.i_left, p->crop_rect.i_top, p->crop_rect.i_right, p->crop_rect.i_bottom ); if( p->i_frame_packing >= 0 ) s += sprintf( s, " frame-packing=%d", p->i_frame_packing ); diff -Nru x264-0.157.2935+git545de2f/common/base.h x264-0.160.3011+gitcde9a93/common/base.h --- x264-0.157.2935+git545de2f/common/base.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/base.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * base.h: misc common functions (bit depth independent) ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -47,14 +47,13 @@ #include #include #include -#include "x264.h" /**************************************************************************** * Macros ****************************************************************************/ #define XCHG(type,a,b) do { type t = a; a = b; b = t; } while( 0 ) #define FIX8(f) ((int)(f*(1<<8)+.5)) -#define ARRAY_ELEMS(a) ((sizeof(a))/(sizeof(a[0]))) +#define ARRAY_ELEMS(a) ((int)((sizeof(a))/(sizeof(a[0])))) #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1)) #define IS_DISPOSABLE(type) ( type == X264_TYPE_B ) @@ -62,11 +61,11 @@ * Mn: load or store n bits, aligned, native-endian * CPn: copy n bits, aligned, native-endian * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */ -typedef union { uint16_t i; uint8_t c[2]; } MAY_ALIAS x264_union16_t; -typedef union { uint32_t i; uint16_t b[2]; uint8_t c[4]; } MAY_ALIAS x264_union32_t; -typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t; +typedef union { uint16_t i; uint8_t b[2]; } MAY_ALIAS x264_union16_t; +typedef union { uint32_t i; uint16_t w[2]; uint8_t b[4]; } MAY_ALIAS x264_union32_t; +typedef union { uint64_t i; uint32_t d[2]; uint16_t w[4]; uint8_t b[8]; } MAY_ALIAS x264_union64_t; typedef struct { uint64_t i[2]; } x264_uint128_t; -typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t; +typedef union { x264_uint128_t i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; uint8_t b[16]; } MAY_ALIAS x264_union128_t; #define M16(src) (((x264_union16_t*)(src))->i) #define M32(src) (((x264_union32_t*)(src))->i) #define M64(src) (((x264_union64_t*)(src))->i) @@ -256,23 +255,23 @@ /**************************************************************************** * General functions ****************************************************************************/ -void x264_reduce_fraction( uint32_t *n, uint32_t *d ); -void x264_reduce_fraction64( uint64_t *n, uint64_t *d ); +X264_API void x264_reduce_fraction( uint32_t *n, uint32_t *d ); +X264_API void x264_reduce_fraction64( uint64_t *n, uint64_t *d ); -void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg ); -void x264_log_internal( int i_level, const char *psz_fmt, ... ); +X264_API void x264_log_default( void *p_unused, int i_level, const char *psz_fmt, va_list arg ); +X264_API void x264_log_internal( int i_level, const char *psz_fmt, ... ); /* x264_malloc : will do or emulate a memalign * you have to use x264_free for buffers allocated with x264_malloc */ -void *x264_malloc( int ); -void x264_free( void * ); +X264_API void *x264_malloc( int64_t ); +X264_API void x264_free( void * ); /* x264_slurp_file: malloc space for the whole file and read it */ -char *x264_slurp_file( const char *filename ); +X264_API char *x264_slurp_file( const char *filename ); /* x264_param2string: return a (malloced) string containing most of * the encoding options */ -char *x264_param2string( x264_param_t *p, int b_res ); +X264_API char *x264_param2string( x264_param_t *p, int b_res ); /**************************************************************************** * Macros @@ -297,21 +296,21 @@ #define PREALLOC_INIT\ int prealloc_idx = 0;\ - size_t prealloc_size = 0;\ + int64_t prealloc_size = 0;\ uint8_t **preallocs[PREALLOC_BUF_SIZE]; #define PREALLOC( var, size )\ do {\ - var = (void*)prealloc_size;\ + var = (void*)(intptr_t)prealloc_size;\ preallocs[prealloc_idx++] = (uint8_t**)&var;\ - prealloc_size += ALIGN(size, NATIVE_ALIGN);\ + prealloc_size += ALIGN((int64_t)(size), NATIVE_ALIGN);\ } while( 0 ) #define PREALLOC_END( ptr )\ do {\ CHECKED_MALLOC( ptr, prealloc_size );\ while( prealloc_idx-- )\ - *preallocs[prealloc_idx] += (intptr_t)ptr;\ + *preallocs[prealloc_idx] = (uint8_t*)((intptr_t)(*preallocs[prealloc_idx]) + (intptr_t)ptr);\ } while( 0 ) #endif diff -Nru x264-0.157.2935+git545de2f/common/bitstream.c x264-0.160.3011+gitcde9a93/common/bitstream.c --- x264-0.157.2935+git545de2f/common/bitstream.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/bitstream.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.c: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Fiona Glaser @@ -45,7 +45,7 @@ #if HAVE_ARMV6 #include "arm/bitstream.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/bitstream.h" #endif @@ -103,7 +103,7 @@ x264_emms(); } -void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf ) +void x264_bitstream_init( uint32_t cpu, x264_bitstream_function_t *pf ) { memset( pf, 0, sizeof(*pf) ); @@ -159,7 +159,7 @@ if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif diff -Nru x264-0.157.2935+git545de2f/common/bitstream.h x264-0.160.3011+gitcde9a93/common/bitstream.h --- x264-0.157.2935+git545de2f/common/bitstream.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/bitstream.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: bitstream writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -66,7 +66,7 @@ } x264_bitstream_function_t; #define x264_bitstream_init x264_template(bitstream_init) -void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf ); +void x264_bitstream_init( uint32_t cpu, x264_bitstream_function_t *pf ); /* A larger level table size theoretically could help a bit at extremely * high bitrates, but the cost in cache is usually too high for it to be diff -Nru x264-0.157.2935+git545de2f/common/cabac.c x264-0.160.3011+gitcde9a93/common/cabac.c --- x264-0.157.2935+git545de2f/common/cabac.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/cabac.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/common/cabac.h x264-0.160.3011+gitcde9a93/common/cabac.h --- x264-0.157.2935+git545de2f/common/cabac.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/cabac.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.h: arithmetic coder ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -80,7 +80,7 @@ #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm diff -Nru x264-0.157.2935+git545de2f/common/common.c x264-0.160.3011+gitcde9a93/common/common.c --- x264-0.157.2935+git545de2f/common/common.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/common.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * common.c: misc common functions ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/common/common.h x264-0.160.3011+gitcde9a93/common/common.h --- x264-0.157.2935+git545de2f/common/common.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/common.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * common.h: misc common functions ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -108,6 +108,8 @@ # define MPIXEL_X4(src) M32(src) #endif +#define SIZEOF_PIXEL ((int)sizeof(pixel)) + #define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src) /**************************************************************************** @@ -528,7 +530,7 @@ int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */ int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */ int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */ - uint32_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of */ + int32_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of */ uint8_t *field; /* buffer for weighted versions of the reference frames */ diff -Nru x264-0.157.2935+git545de2f/common/cpu.c x264-0.160.3011+gitcde9a93/common/cpu.c --- x264-0.157.2935+git545de2f/common/cpu.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/cpu.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu.c: cpu detection ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -45,7 +45,7 @@ const x264_cpu_name_t x264_cpu_names[] = { -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 // {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore #define MMX2 X264_CPU_MMX|X264_CPU_MMX2 {"MMX2", MMX2}, @@ -97,7 +97,7 @@ {"", 0}, }; -#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON) +#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON) #include #include static sigjmp_buf jmpbuf; @@ -298,7 +298,7 @@ return cpu; } -#elif ARCH_PPC && HAVE_ALTIVEC +#elif HAVE_ALTIVEC #if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD #include @@ -355,15 +355,14 @@ } #endif -#elif ARCH_ARM +#elif HAVE_ARMV6 void x264_cpu_neon_test( void ); int x264_cpu_fast_neon_mrc_test( void ); uint32_t x264_cpu_detect( void ) { - int flags = 0; -#if HAVE_ARMV6 + uint32_t flags = 0; flags |= X264_CPU_ARMV6; // don't do this hack if compiled with -mfpu=neon @@ -396,26 +395,25 @@ flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) -#endif return flags; } -#elif ARCH_AARCH64 +#elif HAVE_AARCH64 uint32_t x264_cpu_detect( void ) { +#if HAVE_NEON return X264_CPU_ARMV8 | X264_CPU_NEON; +#else + return X264_CPU_ARMV8; +#endif } -#elif ARCH_MIPS +#elif HAVE_MSA uint32_t x264_cpu_detect( void ) { - uint32_t flags = 0; -#if HAVE_MSA - flags |= X264_CPU_MSA; -#endif - return flags; + return X264_CPU_MSA; } #else @@ -451,7 +449,7 @@ return CPU_COUNT(&p_aff); #else int np = 0; - for( unsigned int bit = 0; bit < 8 * sizeof(p_aff); bit++ ) + for( size_t bit = 0; bit < 8 * sizeof(p_aff); bit++ ) np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1; return np; #endif diff -Nru x264-0.157.2935+git545de2f/common/cpu.h x264-0.160.3011+gitcde9a93/common/cpu.h --- x264-0.157.2935+git545de2f/common/cpu.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/cpu.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cpu.h: cpu detection ***************************************************************************** - * Copyright (C) 2004-2018 x264 project + * Copyright (C) 2004-2020 x264 project * * Authors: Loren Merritt * @@ -26,8 +26,8 @@ #ifndef X264_CPU_H #define X264_CPU_H -uint32_t x264_cpu_detect( void ); -int x264_cpu_num_processors( void ); +X264_API uint32_t x264_cpu_detect( void ); +X264_API int x264_cpu_num_processors( void ); void x264_cpu_emms( void ); void x264_cpu_sfence( void ); #if HAVE_MMX @@ -46,28 +46,11 @@ #endif #define x264_sfence x264_cpu_sfence -/* kludge: - * gcc can't give variables any greater alignment than the stack frame has. - * We need 32 byte alignment for AVX2, so here we make sure that the stack is - * aligned to 32 bytes. - * gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this - * problem, but I don't want to require such a new version. - * aligning to 32 bytes only works if the compiler supports keeping that - * alignment between functions (osdep.h handles manual alignment of arrays - * if it doesn't). - */ -#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)) -intptr_t x264_stack_align( void (*func)(), ... ); -#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__) -#else -#define x264_stack_align(func,...) func(__VA_ARGS__) -#endif - typedef struct { const char *name; uint32_t flags; } x264_cpu_name_t; -extern const x264_cpu_name_t x264_cpu_names[]; +X264_API extern const x264_cpu_name_t x264_cpu_names[]; #endif diff -Nru x264-0.157.2935+git545de2f/common/dct.c x264-0.160.3011+gitcde9a93/common/dct.c --- x264-0.157.2935+git545de2f/common/dct.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/dct.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -29,16 +29,16 @@ #if HAVE_MMX # include "x86/dct.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/dct.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/dct.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/dct.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/dct.h" #endif @@ -472,7 +472,7 @@ /**************************************************************************** * x264_dct_init: ****************************************************************************/ -void x264_dct_init( int cpu, x264_dct_function_t *dctf ) +void x264_dct_init( uint32_t cpu, x264_dct_function_t *dctf ) { dctf->sub4x4_dct = sub4x4_dct; dctf->add4x4_idct = add4x4_idct; @@ -682,7 +682,7 @@ } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { dctf->sub4x4_dct = x264_sub4x4_dct_neon; @@ -901,7 +901,7 @@ } } -void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced ) +void x264_zigzag_init( uint32_t cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced ) { pf_interlaced->scan_8x8 = zigzag_scan_8x8_field; pf_progressive->scan_8x8 = zigzag_scan_8x8_frame; @@ -996,11 +996,11 @@ pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon; -#if ARCH_AARCH64 +#if HAVE_AARCH64 pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon; pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon; pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon; @@ -1010,9 +1010,9 @@ pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon; pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon; pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon; -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 } -#endif // HAVE_ARMV6 || ARCH_AARCH64 +#endif // HAVE_ARMV6 || HAVE_AARCH64 #endif // HIGH_BIT_DEPTH pf_interlaced->interleave_8x8_cavlc = @@ -1065,13 +1065,13 @@ #endif // HIGH_BIT_DEPTH #endif #if !HIGH_BIT_DEPTH -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_interlaced->interleave_8x8_cavlc = pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC ) diff -Nru x264-0.157.2935+git545de2f/common/dct.h x264-0.160.3011+gitcde9a93/common/dct.h --- x264-0.157.2935+git545de2f/common/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: transform and zigzag ***************************************************************************** - * Copyright (C) 2004-2018 x264 project + * Copyright (C) 2004-2020 x264 project * * Authors: Loren Merritt * @@ -70,8 +70,8 @@ } x264_zigzag_function_t; #define x264_dct_init x264_template(dct_init) -void x264_dct_init( int cpu, x264_dct_function_t *dctf ); +void x264_dct_init( uint32_t cpu, x264_dct_function_t *dctf ); #define x264_zigzag_init x264_template(zigzag_init) -void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced ); +void x264_zigzag_init( uint32_t cpu, x264_zigzag_function_t *pf_progressive, x264_zigzag_function_t *pf_interlaced ); #endif diff -Nru x264-0.157.2935+git545de2f/common/deblock.c x264-0.160.3011+gitcde9a93/common/deblock.c --- x264-0.157.2935+git545de2f/common/deblock.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/deblock.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: deblocking ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -399,12 +399,13 @@ uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][h->param.b_sliced_threads?mb_xy:mb_x]; pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x; - pixel *pixuv = h->fdec->plane[1] + chroma_height*mb_y*strideuv + 16*mb_x; + pixel *pixuv = CHROMA_FORMAT ? h->fdec->plane[1] + chroma_height*mb_y*strideuv + 16*mb_x : NULL; if( mb_y & MB_INTERLACED ) { pixy -= 15*stridey; - pixuv -= (chroma_height-1)*strideuv; + if( CHROMA_FORMAT ) + pixuv -= (chroma_height-1)*strideuv; } int stride2y = stridey << MB_INTERLACED; @@ -667,20 +668,20 @@ #if HAVE_MMX #include "x86/deblock.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC #include "ppc/deblock.h" #endif #if HAVE_ARMV6 #include "arm/deblock.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/deblock.h" #endif #if HAVE_MSA #include "mips/deblock.h" #endif -void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) +void x264_deblock_init( uint32_t cpu, x264_deblock_function_t *pf, int b_mbaff ) { pf->deblock_luma[1] = deblock_v_luma_c; pf->deblock_luma[0] = deblock_h_luma_c; @@ -782,7 +783,7 @@ } #endif // HAVE_ALTIVEC -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->deblock_luma[1] = x264_deblock_v_luma_neon; diff -Nru x264-0.157.2935+git545de2f/common/frame.c x264-0.160.3011+gitcde9a93/common/frame.c --- x264-0.157.2935+git545de2f/common/frame.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/frame.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * frame.c: frame handling ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -77,7 +77,7 @@ #endif /* ensure frame alignment after PADH is added */ - int padh_align = X264_MAX( align - PADH * sizeof(pixel), 0 ) / sizeof(pixel); + int padh_align = X264_MAX( align - PADH * SIZEOF_PIXEL, 0 ) / SIZEOF_PIXEL; CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) ); PREALLOC_INIT @@ -152,9 +152,9 @@ { int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12); int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv)); - PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * sizeof(pixel) ); + PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL ); if( PARAM_INTERLACED ) - PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * sizeof(pixel) ); + PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * SIZEOF_PIXEL ); } /* all 4 luma planes allocated together, since the cacheline split code @@ -162,14 +162,14 @@ for( int p = 0; p < luma_plane_count; p++ ) { - int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); + int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); if( h->param.analyse.i_subpel_refine && b_fdec ) luma_plane_size *= 4; /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */ - PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * sizeof(pixel) ); + PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL ); if( PARAM_INTERLACED ) - PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * sizeof(pixel) ); + PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * SIZEOF_PIXEL ); } frame->b_duplicate = 0; @@ -205,9 +205,9 @@ { if( h->frames.b_have_lowres ) { - int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); + int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); - PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) ); + PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * SIZEOF_PIXEL ); for( int j = 0; j <= !!h->param.i_bframe; j++ ) for( int i = 0; i <= h->param.i_bframe; i++ ) @@ -244,13 +244,14 @@ for( int p = 0; p < luma_plane_count; p++ ) { - int luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); + int64_t luma_plane_size = align_plane_size( frame->i_stride[p] * (frame->i_lines[p] + 2*i_padv), disalign ); if( h->param.analyse.i_subpel_refine && b_fdec ) { for( int i = 0; i < 4; i++ ) { frame->filtered[p][i] = frame->buffer[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align; - frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align; + if( PARAM_INTERLACED ) + frame->filtered_fld[p][i] = frame->buffer_fld[p] + i*luma_plane_size + frame->i_stride[p] * i_padv + PADH + padh_align; } frame->plane[p] = frame->filtered[p][0]; frame->plane_fld[p] = frame->filtered_fld[p][0]; @@ -258,7 +259,8 @@ else { frame->filtered[p][0] = frame->plane[p] = frame->buffer[p] + frame->i_stride[p] * i_padv + PADH + padh_align; - frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align; + if( PARAM_INTERLACED ) + frame->filtered_fld[p][0] = frame->plane_fld[p] = frame->buffer_fld[p] + frame->i_stride[p] * i_padv + PADH + padh_align; } } @@ -274,7 +276,7 @@ { if( h->frames.b_have_lowres ) { - int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); + int64_t luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign ); for( int i = 0; i < 4; i++ ) frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size; @@ -417,7 +419,7 @@ h->mc.plane_copy_deinterleave_v210( dst->plane[0], dst->i_stride[0], dst->plane[1], dst->i_stride[1], - (uint32_t *)pix[0], stride[0]/sizeof(uint32_t), h->param.i_width, h->param.i_height ); + (uint32_t *)pix[0], stride[0]/(int)sizeof(uint32_t), h->param.i_width, h->param.i_height ); } else if( i_csp >= X264_CSP_BGR ) { @@ -432,25 +434,25 @@ h->mc.plane_copy_deinterleave_rgb( dst->plane[1+b], dst->i_stride[1+b], dst->plane[0], dst->i_stride[0], dst->plane[2-b], dst->i_stride[2-b], - (pixel*)pix[0], stride[0]/sizeof(pixel), i_csp==X264_CSP_BGRA ? 4 : 3, h->param.i_width, h->param.i_height ); + (pixel*)pix[0], stride[0]/SIZEOF_PIXEL, i_csp==X264_CSP_BGRA ? 4 : 3, h->param.i_width, h->param.i_height ); } else { int v_shift = CHROMA_V_SHIFT; get_plane_ptr( h, src, &pix[0], &stride[0], 0, 0, 0 ); h->mc.plane_copy( dst->plane[0], dst->i_stride[0], (pixel*)pix[0], - stride[0]/sizeof(pixel), h->param.i_width, h->param.i_height ); + stride[0]/SIZEOF_PIXEL, h->param.i_width, h->param.i_height ); if( i_csp == X264_CSP_NV12 || i_csp == X264_CSP_NV16 ) { get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift ); h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1], - stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift ); + stride[1]/SIZEOF_PIXEL, h->param.i_width, h->param.i_height>>v_shift ); } else if( i_csp == X264_CSP_NV21 ) { get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift ); h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1], - stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift ); + stride[1]/SIZEOF_PIXEL, h->param.i_width>>1, h->param.i_height>>v_shift ); } else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 ) { @@ -458,8 +460,8 @@ get_plane_ptr( h, src, &pix[1], &stride[1], uv_swap ? 2 : 1, 1, v_shift ); get_plane_ptr( h, src, &pix[2], &stride[2], uv_swap ? 1 : 2, 1, v_shift ); h->mc.plane_copy_interleave( dst->plane[1], dst->i_stride[1], - (pixel*)pix[1], stride[1]/sizeof(pixel), - (pixel*)pix[2], stride[2]/sizeof(pixel), + (pixel*)pix[1], stride[1]/SIZEOF_PIXEL, + (pixel*)pix[2], stride[2]/SIZEOF_PIXEL, h->param.i_width>>1, h->param.i_height>>v_shift ); } else if( i_csp == X264_CSP_I444 || i_csp == X264_CSP_YV24 ) @@ -467,9 +469,9 @@ get_plane_ptr( h, src, &pix[1], &stride[1], i_csp==X264_CSP_I444 ? 1 : 2, 0, 0 ); get_plane_ptr( h, src, &pix[2], &stride[2], i_csp==X264_CSP_I444 ? 2 : 1, 0, 0 ); h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1], - stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height ); + stride[1]/SIZEOF_PIXEL, h->param.i_width, h->param.i_height ); h->mc.plane_copy( dst->plane[2], dst->i_stride[2], (pixel*)pix[2], - stride[2]/sizeof(pixel), h->param.i_width, h->param.i_height ); + stride[2]/SIZEOF_PIXEL, h->param.i_width, h->param.i_height ); } } return 0; @@ -533,18 +535,18 @@ for( int y = 0; y < i_height; y++ ) { /* left band */ - pixel_memset( PPIXEL(-i_padh, y), PPIXEL(0, y), i_padh>>b_chroma, sizeof(pixel)<>b_chroma, SIZEOF_PIXEL<>b_chroma, sizeof(pixel)<>b_chroma, SIZEOF_PIXEL<plane[i][y*frame->i_stride[i] + i_width], &frame->plane[i][y*frame->i_stride[i] + i_width - 1-h_shift], - i_padx>>h_shift, sizeof(pixel)<>h_shift, SIZEOF_PIXEL<plane[i][y*frame->i_stride[i]], &frame->plane[i][(i_height-(~y&PARAM_INTERLACED)-1)*frame->i_stride[i]], - (i_width + i_padx) * sizeof(pixel) ); + (i_width + i_padx) * SIZEOF_PIXEL ); } } } @@ -670,7 +672,7 @@ int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> v_shift; pixel *fenc = h->fenc->plane[i] + 16*mb_x; for( int y = height; y < height + pady; y++ ) - memcpy( fenc + y*stride, fenc + (height-1)*stride, 16*sizeof(pixel) ); + memcpy( fenc + y*stride, fenc + (height-1)*stride, 16*SIZEOF_PIXEL ); } } @@ -683,12 +685,14 @@ x264_pthread_mutex_unlock( &frame->mutex ); } -void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) +int x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) { + int completed; x264_pthread_mutex_lock( &frame->mutex ); - while( frame->i_lines_completed < i_lines_completed ) + while( (completed = frame->i_lines_completed) < i_lines_completed && i_lines_completed >= 0 ) x264_pthread_cond_wait( &frame->cv, &frame->mutex ); x264_pthread_mutex_unlock( &frame->mutex ); + return completed; } void x264_threadslice_cond_broadcast( x264_t *h, int pass ) diff -Nru x264-0.157.2935+git545de2f/common/frame.h x264-0.160.3011+gitcde9a93/common/frame.h --- x264-0.157.2935+git545de2f/common/frame.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/frame.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * frame.h: frame handling ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -246,12 +246,12 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame ); #define x264_deblock_init x264_template(deblock_init) -void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ); +void x264_deblock_init( uint32_t cpu, x264_deblock_function_t *pf, int b_mbaff ); #define x264_frame_cond_broadcast x264_template(frame_cond_broadcast) void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); #define x264_frame_cond_wait x264_template(frame_cond_wait) -void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); +int x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); #define x264_frame_new_slice x264_template(frame_new_slice) int x264_frame_new_slice( x264_t *h, x264_frame_t *frame ); @@ -261,13 +261,14 @@ void x264_threadslice_cond_wait( x264_t *h, int pass ); #define x264_frame_push x264_template(frame_push) -void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); +X264_API void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); #define x264_frame_pop x264_template(frame_pop) -x264_frame_t *x264_frame_pop( x264_frame_t **list ); +X264_API x264_frame_t *x264_frame_pop( x264_frame_t **list ); #define x264_frame_unshift x264_template(frame_unshift) -void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); +X264_API void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); #define x264_frame_shift x264_template(frame_shift) -x264_frame_t *x264_frame_shift( x264_frame_t **list ); +X264_API x264_frame_t *x264_frame_shift( x264_frame_t **list ); + #define x264_frame_push_unused x264_template(frame_push_unused) void x264_frame_push_unused( x264_t *h, x264_frame_t *frame ); #define x264_frame_push_blank_unused x264_template(frame_push_blank_unused) diff -Nru x264-0.157.2935+git545de2f/common/macroblock.c x264-0.160.3011+gitcde9a93/common/macroblock.c --- x264-0.157.2935+git545de2f/common/macroblock.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/macroblock.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock common functions ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Fiona Glaser * Laurent Aimar @@ -260,7 +260,7 @@ PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) ); PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) ); PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) ); - PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint32_t) ); + PREALLOC( h->mb.slice_table, i_mb_count * sizeof(int32_t) ); /* 0 -> 3 top(4), 4 -> 6 : left(3) */ PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) ); @@ -321,12 +321,12 @@ } for( int i = 0; i < numweightbuf; i++ ) - PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) ); + PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * SIZEOF_PIXEL ); } PREALLOC_END( h->mb.base ); - memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint32_t) ); + memset( h->mb.slice_table, -1, i_mb_count * sizeof(int32_t) ); for( int i = 0; i < 2; i++ ) { @@ -357,7 +357,7 @@ for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ ) for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ ) { - CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) ); + CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * SIZEOF_PIXEL ); h->intra_border_backup[i][j] += 16; } for( int i = 0; i <= PARAM_INTERLACED; i++ ) @@ -555,7 +555,7 @@ int off_y = 16 * i_mb_x + 16 * i_mb_y * stride_y; int off_uv = 16 * i_mb_x + (16 * i_mb_y * stride_uv >> CHROMA_V_SHIFT); h->mc.prefetch_fenc( fenc->plane[0]+off_y, stride_y, - fenc->plane[1]+off_uv, stride_uv, i_mb_x ); + fenc->plane[1] != NULL ? fenc->plane[1]+off_uv : NULL, stride_uv, i_mb_x ); } NOINLINE void x264_copy_column8( pixel *dst, pixel *src ) @@ -586,15 +586,15 @@ if( b_chroma ) { h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height ); - memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) ); - memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) ); + memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*SIZEOF_PIXEL ); + memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*SIZEOF_PIXEL ); h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8]; h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1]; } else { h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 ); - memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) ); + memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*SIZEOF_PIXEL ); h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1]; } if( b_mbaff || h->mb.b_reencode_mb ) @@ -626,8 +626,9 @@ if( !b_chroma ) { - for( int k = 1; k < 4; k++ ) - h->mb.pic.p_fref[0][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1]; + if( h->param.analyse.i_subpel_refine ) + for( int k = 1; k < 4; k++ ) + h->mb.pic.p_fref[0][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1]; if( !i ) { if( h->sh.weight[j][0].weightfn ) @@ -652,7 +653,7 @@ } h->mb.pic.p_fref[1][j][i*4] = plane_src + ref_pix_offset[j&1]; - if( !b_chroma ) + if( !b_chroma && h->param.analyse.i_subpel_refine ) for( int k = 1; k < 4; k++ ) h->mb.pic.p_fref[1][j][i*4+k] = filtered_src[k] + ref_pix_offset[j&1]; } @@ -806,9 +807,9 @@ x264_prefetch( &h->mb.cbp[top] ); x264_prefetch( h->mb.intra4x4_pred_mode[top] ); x264_prefetch( &h->mb.non_zero_count[top][12] ); - /* These aren't always allocated, but prefetching an invalid address can't hurt. */ x264_prefetch( &h->mb.mb_transform_size[top] ); - x264_prefetch( &h->mb.skipbp[top] ); + if( h->param.b_cabac ) + x264_prefetch( &h->mb.skipbp[top] ); } } @@ -894,7 +895,8 @@ * once every 4 MBs, so one extra prefetch is worthwhile */ x264_prefetch( &h->mb.mv[l][top_4x4+4] ); x264_prefetch( &h->mb.ref[l][top_8x8-1] ); - x264_prefetch( &h->mb.mvd[l][top] ); + if( h->param.b_cabac ) + x264_prefetch( &h->mb.mvd[l][top] ); } } else @@ -1640,17 +1642,17 @@ * bottom row of each field. We also store samples needed for the next * mbpair in intra_border_backup[2]. */ int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2; - memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*SIZEOF_PIXEL ); if( CHROMA444 ) { - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*sizeof(pixel) ); - memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+FDEC_STRIDE*15, 16*SIZEOF_PIXEL ); + memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+FDEC_STRIDE*15, 16*SIZEOF_PIXEL ); } else if( CHROMA_FORMAT ) { int backup_src = (15>>CHROMA_V_SHIFT) * FDEC_STRIDE; - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) ); - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*SIZEOF_PIXEL ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*SIZEOF_PIXEL ); } if( b_mbaff ) { @@ -1658,18 +1660,18 @@ { int backup_src = (MB_INTERLACED ? 7 : 14) * FDEC_STRIDE; backup_dst = MB_INTERLACED ? 2 : 0; - memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+backup_src, 16*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+backup_src, 16*SIZEOF_PIXEL ); if( CHROMA444 ) { - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 16*sizeof(pixel) ); - memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+backup_src, 16*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 16*SIZEOF_PIXEL ); + memcpy( &h->intra_border_backup[backup_dst][2][mb_x*16 ], h->mb.pic.p_fdec[2]+backup_src, 16*SIZEOF_PIXEL ); } else if( CHROMA_FORMAT ) { if( CHROMA_FORMAT == CHROMA_420 ) backup_src = (MB_INTERLACED ? 3 : 6) * FDEC_STRIDE; - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*sizeof(pixel) ); - memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*sizeof(pixel) ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16 ], h->mb.pic.p_fdec[1]+backup_src, 8*SIZEOF_PIXEL ); + memcpy( &h->intra_border_backup[backup_dst][1][mb_x*16+8], h->mb.pic.p_fdec[2]+backup_src, 8*SIZEOF_PIXEL ); } } } @@ -1779,9 +1781,7 @@ if( h->sh.i_type != SLICE_TYPE_I ) { int16_t (*mv0)[2] = &h->mb.mv[0][i_mb_4x4]; - int16_t (*mv1)[2] = &h->mb.mv[1][i_mb_4x4]; int8_t *ref0 = &h->mb.ref[0][i_mb_8x8]; - int8_t *ref1 = &h->mb.ref[1][i_mb_8x8]; if( !IS_INTRA( i_mb_type ) ) { ref0[0+0*s8x8] = h->mb.cache.ref[0][x264_scan8[0]]; @@ -1794,6 +1794,8 @@ CP128( &mv0[3*s4x4], h->mb.cache.mv[0][x264_scan8[0]+8*3] ); if( h->sh.i_type == SLICE_TYPE_B ) { + int16_t (*mv1)[2] = &h->mb.mv[1][i_mb_4x4]; + int8_t *ref1 = &h->mb.ref[1][i_mb_8x8]; ref1[0+0*s8x8] = h->mb.cache.ref[1][x264_scan8[0]]; ref1[1+0*s8x8] = h->mb.cache.ref[1][x264_scan8[4]]; ref1[0+1*s8x8] = h->mb.cache.ref[1][x264_scan8[8]]; @@ -1814,6 +1816,8 @@ M128( &mv0[3*s4x4] ) = M128_ZERO; if( h->sh.i_type == SLICE_TYPE_B ) { + int16_t (*mv1)[2] = &h->mb.mv[1][i_mb_4x4]; + int8_t *ref1 = &h->mb.ref[1][i_mb_8x8]; M16( &ref1[0*s8x8] ) = (uint8_t)(-1) * 0x0101; M16( &ref1[1*s8x8] ) = (uint8_t)(-1) * 0x0101; M128( &mv1[0*s4x4] ) = M128_ZERO; @@ -1827,7 +1831,6 @@ if( h->param.b_cabac ) { uint8_t (*mvd0)[2] = h->mb.mvd[0][i_mb_xy]; - uint8_t (*mvd1)[2] = h->mb.mvd[1][i_mb_xy]; if( IS_INTRA(i_mb_type) && i_mb_type != I_PCM ) h->mb.chroma_pred_mode[i_mb_xy] = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode]; else @@ -1841,6 +1844,7 @@ CP16( mvd0[6], h->mb.cache.mvd[0][x264_scan8[13]] ); if( h->sh.i_type == SLICE_TYPE_B ) { + uint8_t (*mvd1)[2] = h->mb.mvd[1][i_mb_xy]; CP64( mvd1[0], h->mb.cache.mvd[1][x264_scan8[10]] ); CP16( mvd1[4], h->mb.cache.mvd[1][x264_scan8[5 ]] ); CP16( mvd1[5], h->mb.cache.mvd[1][x264_scan8[7 ]] ); @@ -1851,7 +1855,10 @@ { M128( mvd0[0] ) = M128_ZERO; if( h->sh.i_type == SLICE_TYPE_B ) + { + uint8_t (*mvd1)[2] = h->mb.mvd[1][i_mb_xy]; M128( mvd1[0] ) = M128_ZERO; + } } if( h->sh.i_type == SLICE_TYPE_B ) diff -Nru x264-0.157.2935+git545de2f/common/macroblock.h x264-0.160.3011+gitcde9a93/common/macroblock.h --- x264-0.157.2935+git545de2f/common/macroblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/macroblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock common functions ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/common/mc.c x264-0.160.3011+gitcde9a93/common/mc.c --- x264-0.157.2935+git545de2f/common/mc.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mc.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: motion compensation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -29,16 +29,16 @@ #if HAVE_MMX #include "x86/mc.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC #include "ppc/mc.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 #include "arm/mc.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/mc.h" #endif -#if ARCH_MIPS +#if HAVE_MSA #include "mips/mc.h" #endif @@ -160,7 +160,7 @@ { for( int y = 0; y < i_height; y++ ) { - memcpy( dst, src, i_width * sizeof(pixel) ); + memcpy( dst, src, i_width * SIZEOF_PIXEL ); src += i_src_stride; dst += i_dst_stride; @@ -293,7 +293,7 @@ { while( h-- ) { - memcpy( dst, src, w * sizeof(pixel) ); + memcpy( dst, src, w * SIZEOF_PIXEL ); dst += i_dst; src += i_src; } @@ -462,7 +462,7 @@ // duplicate last row and column so that their interpolation doesn't have to be special-cased for( int y = 0; y < i_height; y++ ) src[i_width+y*i_stride] = src[i_width-1+y*i_stride]; - memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) ); + memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * SIZEOF_PIXEL ); h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3], i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres ); x264_frame_expand_border_lowres( frame ); @@ -529,7 +529,7 @@ unsigned width = h->mb.i_mb_width; unsigned height = h->mb.i_mb_height; - for( unsigned i = 0; i < len; i++ ) + for( int i = 0; i < len; i++ ) { int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT; @@ -607,7 +607,7 @@ dst[i] = (int16_t)endian_fix16( src[i] ) * (1.0f/256.0f); } -void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ) +void x264_mc_init( uint32_t cpu, x264_mc_functions_t *pf, int cpu_independent ) { pf->mc_luma = mc_luma; pf->get_ref = get_ref; @@ -680,7 +680,7 @@ #if HAVE_ARMV6 x264_mc_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_mc_init_aarch64( cpu, pf ); #endif #if HAVE_MSA diff -Nru x264-0.157.2935+git545de2f/common/mc.h x264-0.160.3011+gitcde9a93/common/mc.h --- x264-0.157.2935+git545de2f/common/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: motion compensation ***************************************************************************** - * Copyright (C) 2004-2018 x264 project + * Copyright (C) 2004-2020 x264 project * * Authors: Loren Merritt * @@ -53,7 +53,7 @@ unsigned width = h->mb.i_mb_width;\ unsigned height = h->mb.i_mb_height;\ \ - for( unsigned i = 0; i < len; current += 32 )\ + for( int i = 0; i < len; current += 32 )\ {\ int end = X264_MIN( i+8, len );\ for( ; i < end; i++, current += 2 )\ @@ -107,7 +107,7 @@ #define PLANE_COPY(align, cpu)\ static void plane_copy_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ {\ - int c_w = (align) / sizeof(pixel) - 1;\ + int c_w = (align) / SIZEOF_PIXEL - 1;\ if( w < 256 ) /* tiny resolutions don't want non-temporal hints. dunno the exact threshold. */\ x264_plane_copy_c( dst, i_dst, src, i_src, w, h );\ else if( !(w&c_w) )\ @@ -126,7 +126,7 @@ x264_plane_copy_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\ }\ /* use plain memcpy on the last line (in memory order) to avoid overreading src. */\ - memcpy( dst, src, w*sizeof(pixel) );\ + memcpy( dst, src, w*SIZEOF_PIXEL );\ }\ } @@ -136,7 +136,7 @@ #define PLANE_COPY_SWAP(align, cpu)\ static void plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\ {\ - int c_w = (align>>1) / sizeof(pixel) - 1;\ + int c_w = (align>>1) / SIZEOF_PIXEL - 1;\ if( !(w&c_w) )\ x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, w, h );\ else if( w > c_w )\ @@ -173,7 +173,7 @@ static void plane_copy_deinterleave_yuyv_##cpu( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,\ pixel *src, intptr_t i_src, int w, int h )\ {\ - int c_w = (align>>1) / sizeof(pixel) - 1;\ + int c_w = (align>>1) / SIZEOF_PIXEL - 1;\ if( !(w&c_w) )\ x264_plane_copy_deinterleave_##cpu( dsta, i_dsta, dstb, i_dstb, src, i_src, w, h );\ else if( w > c_w )\ @@ -207,7 +207,7 @@ pixel *srcu, intptr_t i_srcu,\ pixel *srcv, intptr_t i_srcv, int w, int h )\ {\ - int c_w = 16 / sizeof(pixel) - 1;\ + int c_w = 16 / SIZEOF_PIXEL - 1;\ if( !(w&c_w) )\ x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\ else if( w > c_w && (i_srcu ^ i_srcv) >= 0 ) /* only works correctly for strides with identical signs */\ @@ -340,6 +340,6 @@ } x264_mc_functions_t; #define x264_mc_init x264_template(mc_init) -void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ); +void x264_mc_init( uint32_t cpu, x264_mc_functions_t *pf, int cpu_independent ); #endif diff -Nru x264-0.157.2935+git545de2f/common/mips/dct-c.c x264-0.160.3011+gitcde9a93/common/mips/dct-c.c --- x264-0.157.2935+git545de2f/common/mips/dct-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/dct-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct-c.c: msa transform and zigzag ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mips/dct.h x264-0.160.3011+gitcde9a93/common/mips/dct.h --- x264-0.157.2935+git545de2f/common/mips/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: msa transform and zigzag ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mips/deblock-c.c x264-0.160.3011+gitcde9a93/common/mips/deblock-c.c --- x264-0.157.2935+git545de2f/common/mips/deblock-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/deblock-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock-c.c: msa deblocking ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Neha Rana * diff -Nru x264-0.157.2935+git545de2f/common/mips/deblock.h x264-0.160.3011+gitcde9a93/common/mips/deblock.h --- x264-0.157.2935+git545de2f/common/mips/deblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/deblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.h: msa deblocking ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/mips/macros.h x264-0.160.3011+gitcde9a93/common/mips/macros.h --- x264-0.157.2935+git545de2f/common/mips/macros.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/macros.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macros.h: msa macros ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mips/mc-c.c x264-0.160.3011+gitcde9a93/common/mips/mc-c.c --- x264-0.157.2935+git545de2f/common/mips/mc-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/mc-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: msa motion compensation ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Neha Rana * @@ -3650,7 +3650,7 @@ }; #endif // !HIGH_BIT_DEPTH -void x264_mc_init_mips( int32_t cpu, x264_mc_functions_t *pf ) +void x264_mc_init_mips( uint32_t cpu, x264_mc_functions_t *pf ) { #if !HIGH_BIT_DEPTH if( cpu & X264_CPU_MSA ) diff -Nru x264-0.157.2935+git545de2f/common/mips/mc.h x264-0.160.3011+gitcde9a93/common/mips/mc.h --- x264-0.157.2935+git545de2f/common/mips/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: msa motion compensation ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Neha Rana * @@ -27,6 +27,6 @@ #define X264_MIPS_MC_H #define x264_mc_init_mips x264_template(mc_init_mips) -void x264_mc_init_mips( int cpu, x264_mc_functions_t *pf ); +void x264_mc_init_mips( uint32_t cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.157.2935+git545de2f/common/mips/pixel-c.c x264-0.160.3011+gitcde9a93/common/mips/pixel-c.c --- x264-0.157.2935+git545de2f/common/mips/pixel-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/pixel-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel-c.c: msa pixel metrics ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Mandar Sahastrabuddhe * diff -Nru x264-0.157.2935+git545de2f/common/mips/pixel.h x264-0.160.3011+gitcde9a93/common/mips/pixel.h --- x264-0.157.2935+git545de2f/common/mips/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: msa pixel metrics ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Mandar Sahastrabuddhe * diff -Nru x264-0.157.2935+git545de2f/common/mips/predict-c.c x264-0.160.3011+gitcde9a93/common/mips/predict-c.c --- x264-0.157.2935+git545de2f/common/mips/predict-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/predict-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: msa intra prediction ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Mandar Sahastrabuddhe * diff -Nru x264-0.157.2935+git545de2f/common/mips/predict.h x264-0.160.3011+gitcde9a93/common/mips/predict.h --- x264-0.157.2935+git545de2f/common/mips/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: msa intra prediction ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mips/quant-c.c x264-0.160.3011+gitcde9a93/common/mips/quant-c.c --- x264-0.157.2935+git545de2f/common/mips/quant-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/quant-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant-c.c: msa quantization and level-run ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mips/quant.h x264-0.160.3011+gitcde9a93/common/mips/quant.h --- x264-0.157.2935+git545de2f/common/mips/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mips/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: msa quantization and level-run ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Rishikesh More * diff -Nru x264-0.157.2935+git545de2f/common/mvpred.c x264-0.160.3011+gitcde9a93/common/mvpred.c --- x264-0.157.2935+git545de2f/common/mvpred.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/mvpred.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mvpred.c: motion vector prediction ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser diff -Nru x264-0.157.2935+git545de2f/common/opencl.c x264-0.160.3011+gitcde9a93/common/opencl.c --- x264-0.157.2935+git545de2f/common/opencl.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/opencl.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * opencl.c: OpenCL initialization and kernel compilation ***************************************************************************** - * Copyright (C) 2012-2018 x264 project + * Copyright (C) 2012-2020 x264 project * * Authors: Steve Borho * Anton Mitrofanov @@ -131,8 +131,11 @@ uint8_t *binary = NULL; fseek( fp, 0, SEEK_END ); - size_t size = ftell( fp ); - rewind( fp ); + int64_t file_size = ftell( fp ); + fseek( fp, 0, SEEK_SET ); + if( file_size < 0 || (uint64_t)file_size > SIZE_MAX ) + goto fail; + size_t size = file_size; CHECKED_MALLOC( binary, size ); if( fread( binary, 1, size, fp ) != size ) diff -Nru x264-0.157.2935+git545de2f/common/opencl.h x264-0.160.3011+gitcde9a93/common/opencl.h --- x264-0.157.2935+git545de2f/common/opencl.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/opencl.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * opencl.h: OpenCL structures and defines ***************************************************************************** - * Copyright (C) 2012-2018 x264 project + * Copyright (C) 2012-2020 x264 project * * Authors: Steve Borho * Anton Mitrofanov diff -Nru x264-0.157.2935+git545de2f/common/osdep.c x264-0.160.3011+gitcde9a93/common/osdep.c --- x264-0.157.2935+git545de2f/common/osdep.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/osdep.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * osdep.c: platform-specific code ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Steven Walters * Laurent Aimar @@ -27,11 +27,6 @@ #include "osdep.h" -#ifdef _WIN32 -#include -#include -#endif - #if SYS_WINDOWS #include #include @@ -111,114 +106,3 @@ return 0; } #endif - -#ifdef _WIN32 -/* Functions for dealing with Unicode on Windows. */ -FILE *x264_fopen( const char *filename, const char *mode ) -{ - wchar_t filename_utf16[MAX_PATH]; - wchar_t mode_utf16[16]; - if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) ) - return _wfopen( filename_utf16, mode_utf16 ); - return NULL; -} - -int x264_rename( const char *oldname, const char *newname ) -{ - wchar_t oldname_utf16[MAX_PATH]; - wchar_t newname_utf16[MAX_PATH]; - if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) ) - { - /* POSIX says that rename() removes the destination, but Win32 doesn't. */ - _wunlink( newname_utf16 ); - return _wrename( oldname_utf16, newname_utf16 ); - } - return -1; -} - -int x264_stat( const char *path, x264_struct_stat *buf ) -{ - wchar_t path_utf16[MAX_PATH]; - if( utf8_to_utf16( path, path_utf16 ) ) - return _wstati64( path_utf16, buf ); - return -1; -} - -#if !HAVE_WINRT -int x264_vfprintf( FILE *stream, const char *format, va_list arg ) -{ - HANDLE console = NULL; - DWORD mode; - - if( stream == stdout ) - console = GetStdHandle( STD_OUTPUT_HANDLE ); - else if( stream == stderr ) - console = GetStdHandle( STD_ERROR_HANDLE ); - - /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */ - if( GetConsoleMode( console, &mode ) ) - { - char buf[4096]; - wchar_t buf_utf16[4096]; - va_list arg2; - - va_copy( arg2, arg ); - int length = vsnprintf( buf, sizeof(buf), format, arg2 ); - va_end( arg2 ); - - if( length > 0 && length < sizeof(buf) ) - { - /* WriteConsoleW is the most reliable way to output Unicode to a console. */ - int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) ); - DWORD written; - WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL ); - return length; - } - } - return vfprintf( stream, format, arg ); -} - -int x264_is_pipe( const char *path ) -{ - wchar_t path_utf16[MAX_PATH]; - if( utf8_to_utf16( path, path_utf16 ) ) - return WaitNamedPipeW( path_utf16, 0 ); - return 0; -} -#endif - -#if defined(_MSC_VER) && _MSC_VER < 1900 -/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */ -int x264_snprintf( char *s, size_t n, const char *fmt, ... ) -{ - va_list arg; - va_start( arg, fmt ); - int length = x264_vsnprintf( s, n, fmt, arg ); - va_end( arg ); - return length; -} - -int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ) -{ - int length = -1; - - if( n ) - { - va_list arg2; - va_copy( arg2, arg ); - length = _vsnprintf( s, n, fmt, arg2 ); - va_end( arg2 ); - - /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */ - if( length < 0 || length >= n ) - s[n-1] = '\0'; - } - - /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */ - if( length < 0 ) - return _vscprintf( fmt, arg ); - - return length; -} -#endif -#endif diff -Nru x264-0.157.2935+git545de2f/common/osdep.h x264-0.160.3011+gitcde9a93/common/osdep.h --- x264-0.157.2935+git545de2f/common/osdep.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/osdep.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * osdep.h: platform-specific code ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -43,6 +43,13 @@ #include #endif +#ifdef _WIN32 +#include +#include +#endif + +#include "x264.h" + #if !HAVE_LOG2F #define log2f(x) (logf(x)/0.693147180559945f) #define log2(x) (log(x)/0.693147180559945) @@ -54,12 +61,6 @@ #define strncasecmp _strnicmp #define strtok_r strtok_s #define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) -#if _MSC_VER < 1900 -int x264_snprintf( char *s, size_t n, const char *fmt, ... ); -int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ); -#define snprintf x264_snprintf -#define vsnprintf x264_vsnprintf -#endif #else #include #endif @@ -76,14 +77,81 @@ #define strtok_r(str,delim,save) strtok(str,delim) #endif +#if defined(_MSC_VER) && _MSC_VER < 1900 +/* MSVC pre-VS2015 has broken snprintf/vsnprintf implementations which are incompatible with C99. */ +static inline int x264_vsnprintf( char *s, size_t n, const char *fmt, va_list arg ) +{ + int length = -1; + + if( n ) + { + va_list arg2; + va_copy( arg2, arg ); + length = _vsnprintf( s, n, fmt, arg2 ); + va_end( arg2 ); + + /* _(v)snprintf adds a null-terminator only if the length is less than the buffer size. */ + if( length < 0 || length >= n ) + s[n-1] = '\0'; + } + + /* _(v)snprintf returns a negative number if the length is greater than the buffer size. */ + if( length < 0 ) + return _vscprintf( fmt, arg ); + + return length; +} + +static inline int x264_snprintf( char *s, size_t n, const char *fmt, ... ) +{ + va_list arg; + va_start( arg, fmt ); + int length = x264_vsnprintf( s, n, fmt, arg ); + va_end( arg ); + return length; +} + +#define snprintf x264_snprintf +#define vsnprintf x264_vsnprintf +#endif + #ifdef _WIN32 #define utf8_to_utf16( utf8, utf16 )\ MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, utf8, -1, utf16, sizeof(utf16)/sizeof(wchar_t) ) -FILE *x264_fopen( const char *filename, const char *mode ); -int x264_rename( const char *oldname, const char *newname ); + +/* Functions for dealing with Unicode on Windows. */ +static inline FILE *x264_fopen( const char *filename, const char *mode ) +{ + wchar_t filename_utf16[MAX_PATH]; + wchar_t mode_utf16[16]; + if( utf8_to_utf16( filename, filename_utf16 ) && utf8_to_utf16( mode, mode_utf16 ) ) + return _wfopen( filename_utf16, mode_utf16 ); + return NULL; +} + +static inline int x264_rename( const char *oldname, const char *newname ) +{ + wchar_t oldname_utf16[MAX_PATH]; + wchar_t newname_utf16[MAX_PATH]; + if( utf8_to_utf16( oldname, oldname_utf16 ) && utf8_to_utf16( newname, newname_utf16 ) ) + { + /* POSIX says that rename() removes the destination, but Win32 doesn't. */ + _wunlink( newname_utf16 ); + return _wrename( oldname_utf16, newname_utf16 ); + } + return -1; +} + #define x264_struct_stat struct _stati64 #define x264_fstat _fstati64 -int x264_stat( const char *path, x264_struct_stat *buf ); + +static inline int x264_stat( const char *path, x264_struct_stat *buf ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return _wstati64( path_utf16, buf ); + return -1; +} #else #define x264_fopen fopen #define x264_rename rename @@ -93,11 +161,49 @@ #endif /* mdate: return the current date in microsecond */ -int64_t x264_mdate( void ); +X264_API int64_t x264_mdate( void ); #if defined(_WIN32) && !HAVE_WINRT -int x264_vfprintf( FILE *stream, const char *format, va_list arg ); -int x264_is_pipe( const char *path ); +static inline int x264_vfprintf( FILE *stream, const char *format, va_list arg ) +{ + HANDLE console = NULL; + DWORD mode; + + if( stream == stdout ) + console = GetStdHandle( STD_OUTPUT_HANDLE ); + else if( stream == stderr ) + console = GetStdHandle( STD_ERROR_HANDLE ); + + /* Only attempt to convert to UTF-16 when writing to a non-redirected console screen buffer. */ + if( GetConsoleMode( console, &mode ) ) + { + char buf[4096]; + wchar_t buf_utf16[4096]; + va_list arg2; + + va_copy( arg2, arg ); + int length = vsnprintf( buf, sizeof(buf), format, arg2 ); + va_end( arg2 ); + + if( length > 0 && (unsigned)length < sizeof(buf) ) + { + /* WriteConsoleW is the most reliable way to output Unicode to a console. */ + int length_utf16 = MultiByteToWideChar( CP_UTF8, 0, buf, length, buf_utf16, sizeof(buf_utf16)/sizeof(wchar_t) ); + DWORD written; + WriteConsoleW( console, buf_utf16, length_utf16, &written, NULL ); + return length; + } + } + return vfprintf( stream, format, arg ); +} + +static inline int x264_is_pipe( const char *path ) +{ + wchar_t path_utf16[MAX_PATH]; + if( utf8_to_utf16( path, path_utf16 ) ) + return WaitNamedPipeW( path_utf16, 0 ); + return 0; +} #else #define x264_vfprintf vfprintf #define x264_is_pipe(x) 0 @@ -163,6 +269,12 @@ #define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16 #endif +#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4) +#define REALIGN_STACK __attribute__((force_align_arg_pointer)) +#else +#define REALIGN_STACK +#endif + #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) #define UNUSED __attribute__((unused)) #define ALWAYS_INLINE __attribute__((always_inline)) inline @@ -247,7 +359,7 @@ #endif #if HAVE_WIN32THREAD || PTW32_STATIC_LIB -int x264_threading_init( void ); +X264_API int x264_threading_init( void ); #else #define x264_threading_init() 0 #endif @@ -311,7 +423,7 @@ return endian_fix32(x>>32) + ((uint64_t)endian_fix32(x)<<32); } #endif -static ALWAYS_INLINE intptr_t endian_fix( intptr_t x ) +static ALWAYS_INLINE uintptr_t endian_fix( uintptr_t x ) { return WORD_SIZE == 8 ? endian_fix64(x) : endian_fix32(x); } diff -Nru x264-0.157.2935+git545de2f/common/pixel.c x264-0.160.3011+gitcde9a93/common/pixel.c --- x264-0.157.2935+git545de2f/common/pixel.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/pixel.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -31,18 +31,18 @@ # include "x86/pixel.h" # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/pixel.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/pixel.h" # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/pixel.h" # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/pixel.h" #endif @@ -508,7 +508,7 @@ #endif #if !HIGH_BIT_DEPTH -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 SATD_X_DECL7( _neon ) #endif #endif // !HIGH_BIT_DEPTH @@ -532,7 +532,7 @@ INTRA_MBCMP_8x8( sad, _mmx2, _c ) INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 ) #endif -#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64) +#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64) INTRA_MBCMP_8x8( sad, _neon, _neon ) INTRA_MBCMP_8x8(sa8d, _neon, _neon ) #endif @@ -602,7 +602,7 @@ INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon ) #endif -#if !HIGH_BIT_DEPTH && ARCH_AARCH64 +#if !HIGH_BIT_DEPTH && HAVE_AARCH64 INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon ) @@ -804,7 +804,7 @@ /**************************************************************************** * x264_pixel_init: ****************************************************************************/ -void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) +void x264_pixel_init( uint32_t cpu, x264_pixel_function_t *pixf ) { memset( pixf, 0, sizeof(*pixf) ); @@ -1434,7 +1434,7 @@ } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { INIT8( sad, _neon ); @@ -1475,7 +1475,7 @@ pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; pixf->ssim_end4 = x264_pixel_ssim_end4_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_MSA if( cpu&X264_CPU_MSA ) diff -Nru x264-0.157.2935+git545de2f/common/pixel.h x264-0.160.3011+gitcde9a93/common/pixel.h --- x264-0.157.2935+git545de2f/common/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: pixel metrics ***************************************************************************** - * Copyright (C) 2004-2018 x264 project + * Copyright (C) 2004-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -144,7 +144,7 @@ } x264_pixel_function_t; #define x264_pixel_init x264_template(pixel_init) -void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ); +void x264_pixel_init( uint32_t cpu, x264_pixel_function_t *pixf ); #define x264_pixel_ssd_nv12 x264_template(pixel_ssd_nv12) void x264_pixel_ssd_nv12 ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v ); diff -Nru x264-0.157.2935+git545de2f/common/ppc/dct.c x264-0.160.3011+gitcde9a93/common/ppc/dct.c --- x264-0.157.2935+git545de2f/common/ppc/dct.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/dct.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.c: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Guillaume Poirier * Eric Petit diff -Nru x264-0.157.2935+git545de2f/common/ppc/dct.h x264-0.160.3011+gitcde9a93/common/ppc/dct.h --- x264-0.157.2935+git545de2f/common/ppc/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: ppc transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * Guillaume Poirier diff -Nru x264-0.157.2935+git545de2f/common/ppc/deblock.c x264-0.160.3011+gitcde9a93/common/ppc/deblock.c --- x264-0.157.2935+git545de2f/common/ppc/deblock.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/deblock.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.c: ppc deblocking ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Guillaume Poirier * diff -Nru x264-0.157.2935+git545de2f/common/ppc/deblock.h x264-0.160.3011+gitcde9a93/common/ppc/deblock.h --- x264-0.157.2935+git545de2f/common/ppc/deblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/deblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.h: ppc deblocking ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/ppc/mc.c x264-0.160.3011+gitcde9a93/common/ppc/mc.c --- x264-0.157.2935+git545de2f/common/ppc/mc.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/mc.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.c: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * Guillaume Poirier @@ -38,8 +38,16 @@ { for( int y = 0; y < i_height; y++ ) { +#ifndef __POWER9_VECTOR__ for( int x = 0; x < 4; x++ ) dst[x] = ( src1[x] + src2[x] + 1 ) >> 1; +#else + vec_u8_t s1 = vec_vsx_ld( 0, src1 ); + vec_u8_t s2 = vec_vsx_ld( 0, src2 ); + vec_u8_t avg = vec_avg( s1, s2 ); + + vec_xst_len( avg, dst, 4 ); +#endif dst += i_dst; src1 += i_src1; src2 += i_src1; @@ -51,7 +59,6 @@ uint8_t *src2, int i_height ) { vec_u8_t src1v, src2v; - PREP_STORE8; for( int y = 0; y < i_height; y++ ) { @@ -96,6 +103,12 @@ /* mc_copy: plain c */ +#ifndef __POWER9_VECTOR__ +#define tiny_copy( d, s, l ) memcpy( d, s, l ) +#else +#define tiny_copy( d, s, l ) vec_xst_len( vec_vsx_ld( 0, s ), d, l ) +#endif + #define MC_COPY( name, a ) \ static void name( uint8_t *dst, intptr_t i_dst, \ uint8_t *src, intptr_t i_src, int i_height ) \ @@ -416,6 +429,14 @@ #define VSLD(a,b,n) vec_sld(b,a,16-n) #endif +#ifndef __POWER9_VECTOR__ +#define STORE4_ALIGNED(d, s) vec_ste( (vec_u32_t)s, 0, (uint32_t*) d ) +#define STORE2_UNALIGNED(d, s) vec_ste( vec_splat( (vec_u16_t)s, 0 ), 0, (uint16_t*)d ) +#else +#define STORE4_ALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 4 ) +#define STORE2_UNALIGNED(d, s) vec_xst_len( (vec_u8_t)s, d, 2 ) +#endif + static void mc_chroma_4xh_altivec( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride, uint8_t *src, intptr_t i_src_stride, int mvx, int mvy, int i_height ) @@ -476,8 +497,8 @@ dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v ); dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v ); - vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu ); - vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv ); + STORE4_ALIGNED( dstu, dstuv ); + STORE4_ALIGNED( dstv, dstvv ); srcp += i_src_stride; dstu += i_dst_stride; @@ -498,8 +519,8 @@ dstuv = (vec_u8_t)vec_perm( dstv16, dstv16, perm0v ); dstvv = (vec_u8_t)vec_perm( dstv16, dstv16, perm1v ); - vec_ste( (vec_u32_t)dstuv, 0, (uint32_t*) dstu ); - vec_ste( (vec_u32_t)dstvv, 0, (uint32_t*) dstv ); + STORE4_ALIGNED( dstu, dstuv ); + STORE4_ALIGNED( dstv, dstvv ); srcp += i_src_stride; dstu += i_dst_stride; @@ -525,7 +546,6 @@ srcp = &src[i_src_stride]; LOAD_ZERO; - PREP_STORE8; vec_u16_t coeff0v, coeff1v, coeff2v, coeff3v; vec_u8_t src0v_8, src1v_8, src2v_8, src3v_8; vec_u8_t dstuv, dstvv; @@ -804,20 +824,13 @@ vec_u16_t twov, fourv, fivev, sixv; vec_s16_t sixteenv, thirtytwov; - vec_u16_u temp_u; - temp_u.s[0]=2; - twov = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=4; - fourv = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=5; - fivev = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=6; - sixv = vec_splat( temp_u.v, 0 ); - temp_u.s[0]=16; - sixteenv = (vec_s16_t)vec_splat( temp_u.v, 0 ); - temp_u.s[0]=32; - thirtytwov = (vec_s16_t)vec_splat( temp_u.v, 0 ); + twov = vec_splats( (uint16_t)2 ); + fourv = vec_splats( (uint16_t)4 ); + fivev = vec_splats( (uint16_t)5 ); + sixv = vec_splats( (uint16_t)6 ); + sixteenv = vec_splats( (int16_t)16 ); + thirtytwov = vec_splats( (int16_t)32 ); for( int y = 0; y < i_height; y++ ) { @@ -968,18 +981,14 @@ hv = vec_perm(avgleftv, avgrightv, inverse_bridge_shuffle_1); #endif - vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dst0); - vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dst0); - vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dsth); - vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dsth); + VEC_STORE8( lv, dst0 + 16 * x ); + VEC_STORE8( hv, dsth + 16 * x ); lv = vec_sld(lv, lv, 8); hv = vec_sld(hv, hv, 8); - vec_ste((vec_u32_t)lv,16*x,(uint32_t*)dstv); - vec_ste((vec_u32_t)lv,16*x+4,(uint32_t*)dstv); - vec_ste((vec_u32_t)hv,16*x,(uint32_t*)dstc); - vec_ste((vec_u32_t)hv,16*x+4,(uint32_t*)dstc); + VEC_STORE8( lv, dstv + 16 * x ); + VEC_STORE8( hv, dstc + 16 * x ); } src0 += src_stride*2; @@ -997,23 +1006,16 @@ vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1025,7 +1027,7 @@ weightv = vec_add( weightv, offsetv ); srcv = vec_packsu( weightv, zero_s16v ); - vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst ); + STORE2_UNALIGNED( dst, srcv ); } } else @@ -1038,7 +1040,7 @@ weightv = vec_mladd( weightv, scalev, offsetv ); srcv = vec_packsu( weightv, zero_s16v ); - vec_ste( vec_splat( (vec_u16_t)srcv, 0 ), 0, (uint16_t*)dst ); + STORE2_UNALIGNED( dst, srcv ); } } } @@ -1049,23 +1051,16 @@ vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1098,27 +1093,19 @@ const x264_weight_t *weight, int i_height ) { LOAD_ZERO; - PREP_STORE8; vec_u8_t srcv; vec_s16_t weightv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1154,23 +1141,16 @@ vec_u8_t srcv; vec_s16_t weight_lv, weight_hv; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = 1<<(denom - 1); - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { @@ -1212,15 +1192,11 @@ vec_u8_t srcv, srcv2; vec_s16_t weight_lv, weight_hv, weight_3v; vec_s16_t scalev, offsetv, denomv, roundv; - vec_s16_u loadv; int denom = weight->i_denom; - loadv.s[0] = weight->i_scale; - scalev = vec_splat( loadv.v, 0 ); - - loadv.s[0] = weight->i_offset; - offsetv = vec_splat( loadv.v, 0 ); + scalev = vec_splats( (int16_t)weight->i_scale ); + offsetv = vec_splats( (int16_t)weight->i_offset ); if( denom >= 1 ) { @@ -1232,11 +1208,8 @@ { round, round, round, round, 0, 0, 0, 0 }, }; - loadv.s[0] = denom; - denomv = vec_splat( loadv.v, 0 ); - - loadv.s[0] = round; - roundv = vec_splat( loadv.v, 0 ); + denomv = vec_splats( (int16_t)denom ); + roundv = vec_splats( (int16_t)(1 << (denom - 1)) ); for( int y = 0; y < i_height; y++, dst += i_dst, src += i_src ) { diff -Nru x264-0.157.2935+git545de2f/common/ppc/mc.h x264-0.160.3011+gitcde9a93/common/ppc/mc.h --- x264-0.157.2935+git545de2f/common/ppc/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: ppc motion compensation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * diff -Nru x264-0.157.2935+git545de2f/common/ppc/pixel.c x264-0.160.3011+gitcde9a93/common/ppc/pixel.c --- x264-0.157.2935+git545de2f/common/ppc/pixel.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/pixel.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.c: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * Guillaume Poirier @@ -114,6 +114,23 @@ VEC_ABS( a ); \ c = vec_sum4s( a, b ) +static ALWAYS_INLINE vec_s32_t add_abs_4( vec_s16_t a, vec_s16_t b, + vec_s16_t c, vec_s16_t d ) +{ + vec_s16_t t0 = vec_abs( a ); + vec_s16_t t1 = vec_abs( b ); + vec_s16_t t2 = vec_abs( c ); + vec_s16_t t3 = vec_abs( d ); + + vec_s16_t s0 = vec_adds( t0, t1 ); + vec_s16_t s1 = vec_adds( t2, t3 ); + + vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) ); + vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) ); + + return vec_add( s01, s23 ); +} + /*********************************************************************** * SATD 4x4 **********************************************************************/ @@ -142,10 +159,7 @@ VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -177,10 +191,8 @@ diff0v, diff1v, diff2v, diff3v ); VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + + satdv = add_abs_4( temp0v, temp1v, temp2v, temp3v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff0v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 4, diff1v ); @@ -192,10 +204,8 @@ diff0v, diff1v, diff2v, diff3v ); VEC_HADAMAR( diff0v, diff1v, diff2v, diff3v, temp0v, temp1v, temp2v, temp3v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_4( temp0v, temp1v, temp2v, temp3v ) ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -204,6 +214,36 @@ return i_satd >> 1; } +static ALWAYS_INLINE vec_s32_t add_abs_8( vec_s16_t a, vec_s16_t b, + vec_s16_t c, vec_s16_t d, + vec_s16_t e, vec_s16_t f, + vec_s16_t g, vec_s16_t h ) +{ + vec_s16_t t0 = vec_abs( a ); + vec_s16_t t1 = vec_abs( b ); + vec_s16_t t2 = vec_abs( c ); + vec_s16_t t3 = vec_abs( d ); + + vec_s16_t s0 = vec_adds( t0, t1 ); + vec_s16_t s1 = vec_adds( t2, t3 ); + + vec_s32_t s01 = vec_sum4s( s0, vec_splat_s32( 0 ) ); + vec_s32_t s23 = vec_sum4s( s1, vec_splat_s32( 0 ) ); + + vec_s16_t t4 = vec_abs( e ); + vec_s16_t t5 = vec_abs( f ); + vec_s16_t t6 = vec_abs( g ); + vec_s16_t t7 = vec_abs( h ); + + vec_s16_t s2 = vec_adds( t4, t5 ); + vec_s16_t s3 = vec_adds( t6, t7 ); + + vec_s32_t s0145 = vec_sum4s( s2, s01 ); + vec_s32_t s2367 = vec_sum4s( s3, s23 ); + + return vec_add( s0145, s2367 ); +} + /*********************************************************************** * SATD 8x4 **********************************************************************/ @@ -237,14 +277,8 @@ VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); satdv = vec_sum2s( satdv, zero_s32v ); satdv = vec_splat( satdv, 1 ); @@ -292,14 +326,8 @@ VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -343,14 +371,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff0v ); VEC_DIFF_H( pix1, i_pix1, pix2, i_pix2, 8, diff1v ); @@ -372,14 +395,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -430,14 +448,8 @@ VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); @@ -454,14 +466,8 @@ VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -508,14 +514,10 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ); + VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, @@ -528,14 +530,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh0v, diffl0v ); VEC_DIFF_HL( pix1, i_pix1, pix2, i_pix2, diffh1v, diffl1v ); @@ -557,14 +554,10 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffh4v, diffh5v, diffh6v, diffh7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); + VEC_HADAMAR( diffl0v, diffl1v, diffl2v, diffl3v, temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, @@ -577,14 +570,9 @@ temp0v, temp1v, temp2v, temp3v ); VEC_HADAMAR( diffl4v, diffl5v, diffl6v, diffl7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, satdv, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + + satdv = vec_add( satdv, add_abs_8( temp0v, temp1v, temp2v, temp3v, + temp4v, temp5v, temp6v, temp7v ) ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); @@ -832,6 +820,82 @@ scores[3] = sum3; } +#define PROCESS_PIXS \ + vec_u8_t pix0vH = vec_vsx_ld( 0, pix0 ); \ + pix0 += i_stride; \ + \ + vec_u8_t pix1vH = vec_vsx_ld( 0, pix1 ); \ + pix1 += i_stride; \ + \ + vec_u8_t fencvH = vec_vsx_ld( 0, fenc ); \ + fenc += FENC_STRIDE; \ + \ + vec_u8_t pix2vH = vec_vsx_ld( 0, pix2 ); \ + pix2 += i_stride; \ + \ + vec_u8_t pix0vL = vec_vsx_ld( 0, pix0 ); \ + pix0 += i_stride; \ + \ + vec_u8_t pix1vL = vec_vsx_ld( 0, pix1 ); \ + pix1 += i_stride; \ + \ + vec_u8_t fencvL = vec_vsx_ld( 0, fenc ); \ + fenc += FENC_STRIDE; \ + \ + vec_u8_t pix2vL = vec_vsx_ld( 0, pix2 ); \ + pix2 += i_stride; \ + \ + fencv = xxpermdi( fencvH, fencvL, 0 ); \ + pix0v = xxpermdi( pix0vH, pix0vL, 0 ); \ + pix1v = xxpermdi( pix1vH, pix1vL, 0 ); \ + pix2v = xxpermdi( pix2vH, pix2vL, 0 ); \ + \ + sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); \ + sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); \ + sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); + +#define PIXEL_SAD_X3_ALTIVEC( name, ly ) \ +static int name( uint8_t *fenc, uint8_t *pix0, \ + uint8_t *pix1, uint8_t *pix2, \ + intptr_t i_stride, int scores[3] ) \ +{ \ + ALIGNED_16( int sum0 ); \ + ALIGNED_16( int sum1 ); \ + ALIGNED_16( int sum2 ); \ + \ + LOAD_ZERO; \ + vec_u8_t fencv, pix0v, pix1v, pix2v; \ + vec_s32_t sum0v, sum1v, sum2v; \ + \ + sum0v = vec_splat_s32( 0 ); \ + sum1v = vec_splat_s32( 0 ); \ + sum2v = vec_splat_s32( 0 ); \ + \ + for( int y = 0; y < ly; y++ ) \ + { \ + PROCESS_PIXS \ + } \ + \ + sum0v = vec_sums( sum0v, zero_s32v ); \ + sum1v = vec_sums( sum1v, zero_s32v ); \ + sum2v = vec_sums( sum2v, zero_s32v ); \ + \ + sum0v = vec_splat( sum0v, 3 ); \ + sum1v = vec_splat( sum1v, 3 ); \ + sum2v = vec_splat( sum2v, 3 ); \ + \ + vec_ste( sum0v, 0, &sum0 ); \ + vec_ste( sum1v, 0, &sum1 ); \ + vec_ste( sum2v, 0, &sum2 ); \ + \ + scores[0] = sum0; \ + scores[1] = sum1; \ + scores[2] = sum2; \ +} + +PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x8_altivec, 4 ) +PIXEL_SAD_X3_ALTIVEC( pixel_sad_x3_8x16_altivec, 8 ) + static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, intptr_t i_stride, int scores[3] ) @@ -900,308 +964,49 @@ scores[2] = sum2; } - -static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, - uint8_t *pix0, uint8_t *pix1, - uint8_t *pix2, uint8_t *pix3, - intptr_t i_stride, int scores[4] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - ALIGNED_16( int sum3 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; - vec_s32_t sum0v, sum1v, sum2v, sum3v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - sum3v = vec_splat_s32(0); - - for( int y = 0; y < 8; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - sum3v = vec_sum2s( sum3v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - sum3v = vec_splat( sum3v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - vec_ste( sum3v, 0, &sum3); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; - scores[3] = sum3; -} - -static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, - uint8_t *pix1, uint8_t *pix2, - intptr_t i_stride, int scores[3] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v; - vec_s32_t sum0v, sum1v, sum2v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - - for( int y = 0; y < 8; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; -} - -static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, - uint8_t *pix0, uint8_t *pix1, - uint8_t *pix2, uint8_t *pix3, - intptr_t i_stride, int scores[4] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - ALIGNED_16( int sum3 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; - vec_s32_t sum0v, sum1v, sum2v, sum3v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - sum3v = vec_splat_s32(0); - - for( int y = 0; y < 4; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - pix3v = vec_vsx_ld(0, pix3); - pix3 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - sum3v = vec_sum2s( sum3v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - sum3v = vec_splat( sum3v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - vec_ste( sum3v, 0, &sum3); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; - scores[3] = sum3; +#define PIXEL_SAD_X4_ALTIVEC( name, ly ) \ +static int name( uint8_t *fenc, \ + uint8_t *pix0, uint8_t *pix1, \ + uint8_t *pix2, uint8_t *pix3, \ + intptr_t i_stride, int scores[4] ) \ +{ \ + ALIGNED_16( int sum0 ); \ + ALIGNED_16( int sum1 ); \ + ALIGNED_16( int sum2 ); \ + \ + LOAD_ZERO; \ + vec_u8_t fencv, pix0v, pix1v, pix2v, pix3v; \ + vec_s32_t sum0v, sum1v, sum2v, sum3v; \ + \ + sum0v = vec_splat_s32( 0 ); \ + sum1v = vec_splat_s32( 0 ); \ + sum2v = vec_splat_s32( 0 ); \ + \ + for( int y = 0; y < ly; y++ ) \ + { \ + PROCESS_PIXS \ + vec_u8_t pix3vH = vec_vsx_ld( 0, pix3 ); \ + pix3 += i_stride; \ + vec_u8_t pix3vL = vec_vsx_ld( 0, pix3 ); \ + pix3 += i_stride; \ + pix3v = xxpermdi( pix3vH, pix3vL, 0 ); \ + sum3v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix3v ), (vec_u32_t) sum3v ); \ + } \ + \ + sum0v = vec_sums( sum0v, zero_s32v ); \ + sum1v = vec_sums( sum1v, zero_s32v ); \ + sum2v = vec_sums( sum2v, zero_s32v ); \ + sum3v = vec_sums( sum3v, zero_s32v ); \ + \ + vec_s32_t s01 = vec_mergel( sum0v, sum1v ); \ + vec_s32_t s23 = vec_mergel( sum2v, sum3v ); \ + vec_s32_t s = xxpermdi( s01, s23, 3 ); \ + \ + vec_vsx_st( s, 0, scores ); \ } -static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, - uint8_t *pix1, uint8_t *pix2, - intptr_t i_stride, int scores[3] ) -{ - ALIGNED_16( int sum0 ); - ALIGNED_16( int sum1 ); - ALIGNED_16( int sum2 ); - - LOAD_ZERO; - vec_u8_t fencv, pix0v, pix1v, pix2v; - vec_s32_t sum0v, sum1v, sum2v; - - sum0v = vec_splat_s32(0); - sum1v = vec_splat_s32(0); - sum2v = vec_splat_s32(0); - - for( int y = 0; y < 4; y++ ) - { - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - - pix0v = vec_vsx_ld(0, pix0); - pix0 += i_stride; - - pix1v = vec_vsx_ld(0, pix1); - pix1 += i_stride; - - fencv = vec_vsx_ld(0, fenc); - fenc += FENC_STRIDE; - - pix2v = vec_vsx_ld(0, pix2); - pix2 += i_stride; - - sum0v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix0v ), (vec_u32_t) sum0v ); - sum1v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix1v ), (vec_u32_t) sum1v ); - sum2v = (vec_s32_t) vec_sum4s( vec_absd( fencv, pix2v ), (vec_u32_t) sum2v ); - } - - sum0v = vec_sum2s( sum0v, zero_s32v ); - sum1v = vec_sum2s( sum1v, zero_s32v ); - sum2v = vec_sum2s( sum2v, zero_s32v ); - - sum0v = vec_splat( sum0v, 1 ); - sum1v = vec_splat( sum1v, 1 ); - sum2v = vec_splat( sum2v, 1 ); - - vec_ste( sum0v, 0, &sum0); - vec_ste( sum1v, 0, &sum1); - vec_ste( sum2v, 0, &sum2); - - scores[0] = sum0; - scores[1] = sum1; - scores[2] = sum2; -} +PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x8_altivec, 4 ) +PIXEL_SAD_X4_ALTIVEC( pixel_sad_x4_8x16_altivec, 8 ) /*********************************************************************** * SSD routines diff -Nru x264-0.157.2935+git545de2f/common/ppc/pixel.h x264-0.160.3011+gitcde9a93/common/ppc/pixel.h --- x264-0.157.2935+git545de2f/common/ppc/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: ppc pixel metrics ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * diff -Nru x264-0.157.2935+git545de2f/common/ppc/ppccommon.h x264-0.160.3011+gitcde9a93/common/ppc/ppccommon.h --- x264-0.157.2935+git545de2f/common/ppc/ppccommon.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/ppccommon.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ppccommon.h: ppc utility macros ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Eric Petit * @@ -146,19 +146,14 @@ #define vec_s32_to_u16(v) vec_packsu( v, zero_s32v ) /*********************************************************************** - * PREP_STORE##n: declares required vectors to store n bytes to a - * potentially unaligned address * VEC_STORE##n: stores n bytes from vector v to address p **********************************************************************/ -#define PREP_STORE8 \ - vec_u8_t _tmp3v; \ - vec_u8_t mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F } \ - -#define VEC_STORE8( v, p ) \ - _tmp3v = vec_vsx_ld( 0, p ); \ - v = vec_perm( v, _tmp3v, mask ); \ - vec_vsx_st( v, 0, p ) +#ifndef __POWER9_VECTOR__ +#define VEC_STORE8( v, p ) \ + vec_vsx_st( vec_xxpermdi( v, vec_vsx_ld( 0, p ), 1 ), 0, p ) +#else +#define VEC_STORE8( v, p ) vec_xst_len( v, p, 8 ) +#endif /*********************************************************************** * VEC_TRANSPOSE_8 diff -Nru x264-0.157.2935+git545de2f/common/ppc/predict.c x264-0.160.3011+gitcde9a93/common/ppc/predict.c --- x264-0.157.2935+git545de2f/common/ppc/predict.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/predict.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Guillaume Poirier * @@ -58,8 +58,6 @@ vec_s16_t induc_v = (vec_s16_t) CV(0, 1, 2, 3, 4, 5, 6, 7); vec_s16_t add_i0_b_0v = vec_mladd(induc_v, b_v, i00_v); - PREP_STORE8; - for( int i = 0; i < 8; ++i ) { vec_s16_t shift_0_v = vec_sra(add_i0_b_0v, val5_v); @@ -67,7 +65,6 @@ VEC_STORE8(com_sat_v, &src[0]); src += FDEC_STRIDE; add_i0_b_0v = vec_adds(add_i0_b_0v, c_v); - } } diff -Nru x264-0.157.2935+git545de2f/common/ppc/predict.h x264-0.160.3011+gitcde9a93/common/ppc/predict.h --- x264-0.157.2935+git545de2f/common/ppc/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: ppc intra prediction ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Guillaume Poirier * diff -Nru x264-0.157.2935+git545de2f/common/ppc/quant.c x264-0.160.3011+gitcde9a93/common/ppc/quant.c --- x264-0.157.2935+git545de2f/common/ppc/quant.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/quant.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Guillaume Poirier * @@ -70,7 +70,7 @@ { LOAD_ZERO; vector bool short mskA; - vec_u32_t i_qbitsv; + vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 ); vec_u16_t coefvA; vec_u32_t multEvenvA, multOddvA; vec_u16_t mfvA; @@ -86,14 +86,212 @@ vec_s16_t temp1v, temp2v, tmpv; - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); - QUANT_16_U( 0, 16 ); return vec_any_ne(nz, zero_s16v); } +int x264_quant_4x4x4_altivec( dctcoef dcta[4][16], udctcoef mf[16], udctcoef bias[16] ) +{ + LOAD_ZERO; + vec_u32_t i_qbitsv = vec_splats( (uint32_t)16 ); + vec_s16_t one = vec_splat_s16( 1 ); + vec_s16_t nz0, nz1, nz2, nz3; + + vector bool short mskA0; + vec_u16_t coefvA0; + vec_u32_t multEvenvA0, multOddvA0; + vec_u16_t mfvA0; + vec_u16_t biasvA0; + vector bool short mskB0; + vec_u16_t coefvB0; + vec_u32_t multEvenvB0, multOddvB0; + vec_u16_t mfvB0; + vec_u16_t biasvB0; + + vector bool short mskA1; + vec_u16_t coefvA1; + vec_u32_t multEvenvA1, multOddvA1; + vec_u16_t mfvA1; + vec_u16_t biasvA1; + vector bool short mskB1; + vec_u16_t coefvB1; + vec_u32_t multEvenvB1, multOddvB1; + vec_u16_t mfvB1; + vec_u16_t biasvB1; + + vector bool short mskA2; + vec_u16_t coefvA2; + vec_u32_t multEvenvA2, multOddvA2; + vec_u16_t mfvA2; + vec_u16_t biasvA2; + vector bool short mskB2; + vec_u16_t coefvB2; + vec_u32_t multEvenvB2, multOddvB2; + vec_u16_t mfvB2; + vec_u16_t biasvB2; + + vector bool short mskA3; + vec_u16_t coefvA3; + vec_u32_t multEvenvA3, multOddvA3; + vec_u16_t mfvA3; + vec_u16_t biasvA3; + vector bool short mskB3; + vec_u16_t coefvB3; + vec_u32_t multEvenvB3, multOddvB3; + vec_u16_t mfvB3; + vec_u16_t biasvB3; + + vec_s16_t temp1v, temp2v; + vec_s16_t tmpv0; + vec_s16_t tmpv1; + + dctcoef *dct0 = dcta[0]; + dctcoef *dct1 = dcta[1]; + dctcoef *dct2 = dcta[2]; + dctcoef *dct3 = dcta[3]; + + temp1v = vec_ld( 0, dct0 ); + temp2v = vec_ld( 16, dct0 ); + mfvA0 = vec_ld( 0, mf ); + mfvB0 = vec_ld( 16, mf ); + biasvA0 = vec_ld( 0, bias ); + biasvB0 = vec_ld( 16, bias ); + mskA0 = vec_cmplt( temp1v, zero_s16v ); + mskB0 = vec_cmplt( temp2v, zero_s16v ); + coefvA0 = (vec_u16_t)vec_abs( temp1v ); + coefvB0 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct1 ); + temp2v = vec_ld( 16, dct1 ); + mfvA1 = vec_ld( 0, mf ); + mfvB1 = vec_ld( 16, mf ); + biasvA1 = vec_ld( 0, bias ); + biasvB1 = vec_ld( 16, bias ); + mskA1 = vec_cmplt( temp1v, zero_s16v ); + mskB1 = vec_cmplt( temp2v, zero_s16v ); + coefvA1 = (vec_u16_t)vec_abs( temp1v ); + coefvB1 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct2 ); + temp2v = vec_ld( 16, dct2 ); + mfvA2 = vec_ld( 0, mf ); + mfvB2 = vec_ld( 16, mf ); + biasvA2 = vec_ld( 0, bias ); + biasvB2 = vec_ld( 16, bias ); + mskA2 = vec_cmplt( temp1v, zero_s16v ); + mskB2 = vec_cmplt( temp2v, zero_s16v ); + coefvA2 = (vec_u16_t)vec_abs( temp1v ); + coefvB2 = (vec_u16_t)vec_abs( temp2v ); + temp1v = vec_ld( 0, dct3 ); + temp2v = vec_ld( 16, dct3 ); + mfvA3 = vec_ld( 0, mf ); + mfvB3 = vec_ld( 16, mf ); + biasvA3 = vec_ld( 0, bias ); + biasvB3 = vec_ld( 16, bias ); + mskA3 = vec_cmplt( temp1v, zero_s16v ); + mskB3 = vec_cmplt( temp2v, zero_s16v ); + coefvA3 = (vec_u16_t)vec_abs( temp1v ); + coefvB3 = (vec_u16_t)vec_abs( temp2v ); + + coefvA0 = vec_adds( coefvA0, biasvA0 ); + coefvB0 = vec_adds( coefvB0, biasvB0 ); + coefvA1 = vec_adds( coefvA1, biasvA1 ); + coefvB1 = vec_adds( coefvB1, biasvB1 ); + coefvA2 = vec_adds( coefvA2, biasvA2 ); + coefvB2 = vec_adds( coefvB2, biasvB2 ); + coefvA3 = vec_adds( coefvA3, biasvA3 ); + coefvB3 = vec_adds( coefvB3, biasvB3 ); + + multEvenvA0 = vec_mule( coefvA0, mfvA0 ); + multOddvA0 = vec_mulo( coefvA0, mfvA0 ); + multEvenvB0 = vec_mule( coefvB0, mfvB0 ); + multOddvB0 = vec_mulo( coefvB0, mfvB0 ); + multEvenvA0 = vec_sr( multEvenvA0, i_qbitsv ); + multOddvA0 = vec_sr( multOddvA0, i_qbitsv ); + multEvenvB0 = vec_sr( multEvenvB0, i_qbitsv ); + multOddvB0 = vec_sr( multOddvB0, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA0, multOddvA0 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB0, multOddvB0 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA0 ); + temp2v = vec_xor( temp2v, mskB0 ); + temp1v = vec_adds( temp1v, vec_and( mskA0, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB0, one ) ); + vec_st( temp1v, 0, dct0 ); + vec_st( temp2v, 16, dct0 ); + nz0 = vec_or( temp1v, temp2v ); + + multEvenvA1 = vec_mule( coefvA1, mfvA1 ); + multOddvA1 = vec_mulo( coefvA1, mfvA1 ); + multEvenvB1 = vec_mule( coefvB1, mfvB1 ); + multOddvB1 = vec_mulo( coefvB1, mfvB1 ); + multEvenvA1 = vec_sr( multEvenvA1, i_qbitsv ); + multOddvA1 = vec_sr( multOddvA1, i_qbitsv ); + multEvenvB1 = vec_sr( multEvenvB1, i_qbitsv ); + multOddvB1 = vec_sr( multOddvB1, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA1, multOddvA1 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB1, multOddvB1 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA1 ); + temp2v = vec_xor( temp2v, mskB1 ); + temp1v = vec_adds( temp1v, vec_and( mskA1, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB1, one ) ); + vec_st( temp1v, 0, dct1 ); + vec_st( temp2v, 16, dct1 ); + nz1 = vec_or( temp1v, temp2v ); + + multEvenvA2 = vec_mule( coefvA2, mfvA2 ); + multOddvA2 = vec_mulo( coefvA2, mfvA2 ); + multEvenvB2 = vec_mule( coefvB2, mfvB2 ); + multOddvB2 = vec_mulo( coefvB2, mfvB2 ); + multEvenvA2 = vec_sr( multEvenvA2, i_qbitsv ); + multOddvA2 = vec_sr( multOddvA2, i_qbitsv ); + multEvenvB2 = vec_sr( multEvenvB2, i_qbitsv ); + multOddvB2 = vec_sr( multOddvB2, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA2, multOddvA2 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB2, multOddvB2 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA2 ); + temp2v = vec_xor( temp2v, mskB2 ); + temp1v = vec_adds( temp1v, vec_and( mskA2, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB2, one ) ); + vec_st( temp1v, 0, dct2 ); + vec_st( temp2v, 16, dct2 ); + nz2 = vec_or( temp1v, temp2v ); + + multEvenvA3 = vec_mule( coefvA3, mfvA3 ); + multOddvA3 = vec_mulo( coefvA3, mfvA3 ); + multEvenvB3 = vec_mule( coefvB3, mfvB3 ); + multOddvB3 = vec_mulo( coefvB3, mfvB3 ); + multEvenvA3 = vec_sr( multEvenvA3, i_qbitsv ); + multOddvA3 = vec_sr( multOddvA3, i_qbitsv ); + multEvenvB3 = vec_sr( multEvenvB3, i_qbitsv ); + multOddvB3 = vec_sr( multOddvB3, i_qbitsv ); + temp1v = (vec_s16_t)vec_packs( multEvenvA3, multOddvA3 ); + temp2v = (vec_s16_t)vec_packs( multEvenvB3, multOddvB3 ); + tmpv0 = xxpermdi( temp1v, temp1v, 2 ); + tmpv1 = xxpermdi( temp2v, temp2v, 2 ); + temp1v = vec_mergeh( temp1v, tmpv0 ); + temp2v = vec_mergeh( temp2v, tmpv1 ); + temp1v = vec_xor( temp1v, mskA3 ); + temp2v = vec_xor( temp2v, mskB3 ); + temp1v = vec_adds( temp1v, vec_and( mskA3, one ) ); + temp2v = vec_adds( temp2v, vec_and( mskB3, one ) ); + vec_st( temp1v, 0, dct3 ); + vec_st( temp2v, 16, dct3 ); + nz3 = vec_or( temp1v, temp2v ); + + return (vec_any_ne( nz0, zero_s16v ) << 0) | (vec_any_ne( nz1, zero_s16v ) << 1) | + (vec_any_ne( nz2, zero_s16v ) << 2) | (vec_any_ne( nz3, zero_s16v ) << 3); +} + // DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled" #define QUANT_16_U_DC( idx0, idx1 ) \ { \ @@ -180,6 +378,7 @@ vec_u32_t multEvenvA, multOddvA; vec_s16_t one = vec_splat_s16(1); vec_s16_t nz = zero_s16v; + static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0); vec_s16_t temp1v, temp2v; @@ -190,7 +389,6 @@ i_qbitsv = vec_splats( (uint32_t) 16 ); biasv = vec_splats( (uint16_t)bias ); - static const vec_s16_t mask2 = CV(-1, -1, -1, -1, 0, 0, 0, 0); QUANT_4_U_DC(0); return vec_any_ne(vec_and(nz, mask2), zero_s16v); } @@ -215,9 +413,7 @@ vec_s16_t temp1v, temp2v, tmpv; - vec_u32_u qbits_u; - qbits_u.s[0]=16; - i_qbitsv = vec_splat(qbits_u.v, 0); + i_qbitsv = vec_splats( (uint32_t)16 ); for( int i = 0; i < 4; i++ ) QUANT_16_U( i*2*16, i*2*16+16 ); @@ -233,8 +429,6 @@ \ multEvenvA = vec_mule(dctv, mfv); \ multOddvA = vec_mulo(dctv, mfv); \ - dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), \ - vec_mergel(multEvenvA, multOddvA)); \ dctv = (vec_s16_t) vec_packs( multEvenvA, multOddvA ); \ tmpv = xxpermdi( dctv, dctv, 2 ); \ dctv = vec_mergeh( dctv, tmpv ); \ diff -Nru x264-0.157.2935+git545de2f/common/ppc/quant.h x264-0.160.3011+gitcde9a93/common/ppc/quant.h --- x264-0.157.2935+git545de2f/common/ppc/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/ppc/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: ppc quantization ***************************************************************************** - * Copyright (C) 2007-2018 x264 project + * Copyright (C) 2007-2020 x264 project * * Authors: Guillaume Poirier * @@ -26,6 +26,8 @@ #ifndef X264_PPC_QUANT_H #define X264_PPC_QUANT_H +#define x264_quant_4x4x4_altivec x264_template(quant_4x4x4_altivec) +int x264_quant_4x4x4_altivec( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] ); #define x264_quant_4x4_altivec x264_template(quant_4x4_altivec) int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ); #define x264_quant_8x8_altivec x264_template(quant_8x8_altivec) diff -Nru x264-0.157.2935+git545de2f/common/predict.c x264-0.160.3011+gitcde9a93/common/predict.c --- x264-0.157.2935+git545de2f/common/predict.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/predict.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -34,16 +34,16 @@ #if HAVE_MMX # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/predict.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/predict.h" #endif @@ -883,7 +883,7 @@ /**************************************************************************** * Exported functions: ****************************************************************************/ -void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] ) +void x264_predict_16x16_init( uint32_t cpu, x264_predict_t pf[7] ) { pf[I_PRED_16x16_V ] = x264_predict_16x16_v_c; pf[I_PRED_16x16_H ] = x264_predict_16x16_h_c; @@ -906,7 +906,7 @@ x264_predict_16x16_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_16x16_init_aarch64( cpu, pf ); #endif @@ -926,7 +926,7 @@ #endif } -void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x8c_init( uint32_t cpu, x264_predict_t pf[7] ) { pf[I_PRED_CHROMA_V ] = x264_predict_8x8c_v_c; pf[I_PRED_CHROMA_H ] = x264_predict_8x8c_h_c; @@ -949,7 +949,7 @@ x264_predict_8x8c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8c_init_aarch64( cpu, pf ); #endif @@ -963,7 +963,7 @@ #endif } -void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x16c_init( uint32_t cpu, x264_predict_t pf[7] ) { pf[I_PRED_CHROMA_V ] = x264_predict_8x16c_v_c; pf[I_PRED_CHROMA_H ] = x264_predict_8x16c_h_c; @@ -981,12 +981,12 @@ x264_predict_8x16c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x16c_init_aarch64( cpu, pf ); #endif } -void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) +void x264_predict_8x8_init( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ) { pf[I_PRED_8x8_V] = x264_predict_8x8_v_c; pf[I_PRED_8x8_H] = x264_predict_8x8_h_c; @@ -1010,7 +1010,7 @@ x264_predict_8x8_init_arm( cpu, pf, predict_filter ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); #endif @@ -1024,7 +1024,7 @@ #endif } -void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] ) +void x264_predict_4x4_init( uint32_t cpu, x264_predict_t pf[12] ) { pf[I_PRED_4x4_V] = x264_predict_4x4_v_c; pf[I_PRED_4x4_H] = x264_predict_4x4_h_c; @@ -1047,7 +1047,7 @@ x264_predict_4x4_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_4x4_init_aarch64( cpu, pf ); #endif } diff -Nru x264-0.157.2935+git545de2f/common/predict.h x264-0.160.3011+gitcde9a93/common/predict.h --- x264-0.157.2935+git545de2f/common/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: intra prediction ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -147,14 +147,14 @@ void x264_predict_8x16c_p_c ( pixel *src ); #define x264_predict_16x16_init x264_template(predict_16x16_init) -void x264_predict_16x16_init ( int cpu, x264_predict_t pf[7] ); +void x264_predict_16x16_init ( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x8c_init x264_template(predict_8x8c_init) -void x264_predict_8x8c_init ( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x8c_init ( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x16c_init x264_template(predict_8x16c_init) -void x264_predict_8x16c_init ( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x16c_init ( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_4x4_init x264_template(predict_4x4_init) -void x264_predict_4x4_init ( int cpu, x264_predict_t pf[12] ); +void x264_predict_4x4_init ( uint32_t cpu, x264_predict_t pf[12] ); #define x264_predict_8x8_init x264_template(predict_8x8_init) -void x264_predict_8x8_init ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); +void x264_predict_8x8_init ( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter ); #endif diff -Nru x264-0.157.2935+git545de2f/common/quant.c x264-0.160.3011+gitcde9a93/common/quant.c --- x264-0.157.2935+git545de2f/common/quant.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/quant.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.c: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -31,16 +31,16 @@ #if HAVE_MMX #include "x86/quant.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/quant.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/quant.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/quant.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/quant.h" #endif @@ -408,7 +408,7 @@ #define INIT_TRELLIS(...) #endif -void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) +void x264_quant_init( x264_t *h, uint32_t cpu, x264_quant_function_t *pf ) { pf->quant_8x8 = quant_8x8; pf->quant_4x4 = quant_4x4; @@ -741,6 +741,7 @@ pf->quant_2x2_dc = x264_quant_2x2_dc_altivec; pf->quant_4x4_dc = x264_quant_4x4_dc_altivec; pf->quant_4x4 = x264_quant_4x4_altivec; + pf->quant_4x4x4 = x264_quant_4x4x4_altivec; pf->quant_8x8 = x264_quant_8x8_altivec; pf->dequant_4x4 = x264_dequant_4x4_altivec; @@ -755,7 +756,7 @@ pf->coeff_last8 = x264_coeff_last8_arm; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->quant_2x2_dc = x264_quant_2x2_dc_neon; @@ -775,7 +776,7 @@ pf->decimate_score64 = x264_decimate_score64_neon; } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_ARMV8 ) { pf->coeff_last4 = x264_coeff_last4_aarch64; diff -Nru x264-0.157.2935+git545de2f/common/quant.h x264-0.160.3011+gitcde9a93/common/quant.h --- x264-0.157.2935+git545de2f/common/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -70,6 +70,6 @@ } x264_quant_function_t; #define x264_quant_init x264_template(quant_init) -void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ); +void x264_quant_init( x264_t *h, uint32_t cpu, x264_quant_function_t *pf ); #endif diff -Nru x264-0.157.2935+git545de2f/common/rectangle.c x264-0.160.3011+gitcde9a93/common/rectangle.c --- x264-0.157.2935+git545de2f/common/rectangle.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/rectangle.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.c: rectangle filling ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Fiona Glaser * diff -Nru x264-0.157.2935+git545de2f/common/rectangle.h x264-0.160.3011+gitcde9a93/common/rectangle.h --- x264-0.157.2935+git545de2f/common/rectangle.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/rectangle.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rectangle.h: rectangle filling ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Fiona Glaser * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/common/set.c x264-0.160.3011+gitcde9a93/common/set.c --- x264-0.157.2935+git545de2f/common/set.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/set.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.c: quantization init ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * diff -Nru x264-0.157.2935+git545de2f/common/set.h x264-0.160.3011+gitcde9a93/common/set.h --- x264-0.157.2935+git545de2f/common/set.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/set.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.h: quantization init ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/common/tables.c x264-0.160.3011+gitcde9a93/common/tables.c --- x264-0.157.2935+git545de2f/common/tables.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/tables.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * tables.c: const tables ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -2535,5 +2535,5 @@ }, }; -/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*sizeof(pixel) */ +/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*SIZEOF_PIXEL */ ALIGNED_64( uint8_t x264_zero[1024] ) = { 0 }; diff -Nru x264-0.157.2935+git545de2f/common/tables.h x264-0.160.3011+gitcde9a93/common/tables.h --- x264-0.157.2935+git545de2f/common/tables.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/tables.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * tables.h: const tables ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -33,7 +33,7 @@ uint8_t i_size; } vlc_t; -extern const x264_level_t x264_levels[]; +X264_API extern const x264_level_t x264_levels[]; extern const uint8_t x264_exp2_lut[64]; extern const float x264_log2_lut[128]; diff -Nru x264-0.157.2935+git545de2f/common/threadpool.c x264-0.160.3011+gitcde9a93/common/threadpool.c --- x264-0.157.2935+git545de2f/common/threadpool.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/threadpool.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.c: thread pooling ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * @@ -34,7 +34,7 @@ struct x264_threadpool_t { - int exit; + volatile int exit; int threads; x264_pthread_t *thread_handle; void (*init_func)(void *); @@ -47,7 +47,7 @@ x264_sync_frame_list_t done; /* list of jobs that have finished processing */ }; -static void *threadpool_thread_internal( x264_threadpool_t *pool ) +REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool ) { if( pool->init_func ) pool->init_func( pool->init_arg ); @@ -72,11 +72,6 @@ return NULL; } -static void *threadpool_thread( x264_threadpool_t *pool ) -{ - return (void*)x264_stack_align( threadpool_thread_internal, pool ); -} - int x264_threadpool_init( x264_threadpool_t **p_pool, int threads, void (*init_func)(void *), void *init_arg ) { diff -Nru x264-0.157.2935+git545de2f/common/threadpool.h x264-0.160.3011+gitcde9a93/common/threadpool.h --- x264-0.157.2935+git545de2f/common/threadpool.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/threadpool.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * threadpool.h: thread pooling ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * @@ -30,14 +30,14 @@ #if HAVE_THREAD #define x264_threadpool_init x264_template(threadpool_init) -int x264_threadpool_init( x264_threadpool_t **p_pool, int threads, - void (*init_func)(void *), void *init_arg ); +X264_API int x264_threadpool_init( x264_threadpool_t **p_pool, int threads, + void (*init_func)(void *), void *init_arg ); #define x264_threadpool_run x264_template(threadpool_run) -void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg ); +X264_API void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg ); #define x264_threadpool_wait x264_template(threadpool_wait) -void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg ); +X264_API void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg ); #define x264_threadpool_delete x264_template(threadpool_delete) -void x264_threadpool_delete( x264_threadpool_t *pool ); +X264_API void x264_threadpool_delete( x264_threadpool_t *pool ); #else #define x264_threadpool_init(p,t,f,a) -1 #define x264_threadpool_run(p,f,a) diff -Nru x264-0.157.2935+git545de2f/common/vlc.c x264-0.160.3011+gitcde9a93/common/vlc.c --- x264-0.157.2935+git545de2f/common/vlc.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/vlc.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * vlc.c : vlc tables ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Fiona Glaser diff -Nru x264-0.157.2935+git545de2f/common/win32thread.c x264-0.160.3011+gitcde9a93/common/win32thread.c --- x264-0.157.2935+git545de2f/common/win32thread.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/win32thread.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.c: windows threading ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * Pegasys Inc. diff -Nru x264-0.157.2935+git545de2f/common/win32thread.h x264-0.160.3011+gitcde9a93/common/win32thread.h --- x264-0.157.2935+git545de2f/common/win32thread.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/win32thread.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * win32thread.h: windows threading ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/common/x86/bitstream-a.asm x264-0.160.3011+gitcde9a93/common/x86/bitstream-a.asm --- x264-0.157.2935+git545de2f/common/x86/bitstream-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/bitstream-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* bitstream-a.asm: x86 bitstream functions ;***************************************************************************** -;* Copyright (C) 2010-2018 x264 project +;* Copyright (C) 2010-2020 x264 project ;* ;* Authors: Fiona Glaser ;* Henrik Gramner diff -Nru x264-0.157.2935+git545de2f/common/x86/bitstream.h x264-0.160.3011+gitcde9a93/common/x86/bitstream.h --- x264-0.157.2935+git545de2f/common/x86/bitstream.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/bitstream.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * bitstream.h: x86 bitstream functions ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/x86/cabac-a.asm x264-0.160.3011+gitcde9a93/common/x86/cabac-a.asm --- x264-0.157.2935+git545de2f/common/x86/cabac-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/cabac-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* cabac-a.asm: x86 cabac ;***************************************************************************** -;* Copyright (C) 2008-2018 x264 project +;* Copyright (C) 2008-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -36,11 +36,7 @@ %xdefine %%funccpu2 %3 ; last64 %xdefine %%funccpu3 %4 ; last15/last16 coeff_last_%1: - %ifdef PIC - %xdefine %%base coeff_last_%1 ; offset relative to the start of the table - %else - %xdefine %%base 0 ; absolute address - %endif + %xdefine %%base coeff_last_%1 %rep 14 %ifidn %5, 4 dd mangle(private_prefix %+ _coeff_last%5_ %+ %%funccpu1) - %%base @@ -121,15 +117,13 @@ endstruc %macro LOAD_GLOBAL 3-5 0 ; dst, base, off1, off2, tmp -%ifdef PIC - %ifidn %4, 0 - movzx %1, byte [%2+%3+r7-$$] - %else - lea %5, [r7+%4] - movzx %1, byte [%2+%3+%5-$$] - %endif -%else +%if ARCH_X86_64 == 0 movzx %1, byte [%2+%3+%4] +%elifidn %4, 0 + movzx %1, byte [%2+%3+r7-$$] +%else + lea %5, [r7+%4] + movzx %1, byte [%2+%3+%5-$$] %endif %endmacro @@ -154,9 +148,9 @@ shr t5d, 6 movifnidn t2d, r2m %if WIN64 - PUSH r7 + PUSH r7 %endif -%ifdef PIC +%if ARCH_X86_64 lea r7, [$$] %endif LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2, t4 @@ -183,7 +177,7 @@ shl t6d, t3b %endif %if WIN64 - POP r7 + POP r7 %endif mov [t0+cb.range], t4d add t3d, [t0+cb.queue] @@ -278,6 +272,7 @@ CABAC asm CABAC bmi2 +%if ARCH_X86_64 ; %1 = label name ; %2 = node_ctx init? %macro COEFF_ABS_LEVEL_GT1 2 @@ -409,13 +404,9 @@ %endmacro %macro COEFF_LAST 2 ; table, ctx_block_cat -%ifdef PIC lea r1, [%1 GLOBAL] movsxd r6, [r1+4*%2] add r6, r1 -%else - movsxd r6, [%1+4*%2] -%endif call r6 %endmacro @@ -436,15 +427,9 @@ %define dct r4 %endif -%ifdef PIC - cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF +cglobal func, 4,13,6,-maxcoeffs*SIZEOF_DCTCOEF lea r12, [$$] %define GLOBAL +r12-$$ -%else - cglobal func, 4,12,6,-maxcoeffs*SIZEOF_DCTCOEF - %define GLOBAL -%endif - shl r1d, 4 ; MB_INTERLACED*16 %if %1 lea r4, [significant_coeff_flag_offset_8x8+r1*4 GLOBAL] ; r12 = sig offset 8x8 @@ -554,7 +539,6 @@ RET %endmacro -%if ARCH_X86_64 INIT_XMM sse2 CABAC_RESIDUAL_RD 0, coeff_last_sse2 CABAC_RESIDUAL_RD 1, coeff_last_sse2 @@ -575,7 +559,6 @@ CABAC_RESIDUAL_RD 0, coeff_last_avx512 INIT_ZMM avx512 CABAC_RESIDUAL_RD 1, coeff_last_avx512 -%endif ;----------------------------------------------------------------------------- ; void x264_cabac_block_residual_internal_sse2 ( dctcoef *l, int b_interlaced, @@ -653,15 +636,10 @@ %macro CABAC_RESIDUAL 1 cglobal cabac_block_residual_internal, 4,15,0,-4*64 -%ifdef PIC ; if we use the same r7 as in cabac_encode_decision, we can cheat and save a register. lea r7, [$$] %define lastm [rsp+4*1] %define GLOBAL +r7-$$ -%else - %define lastm r7d - %define GLOBAL -%endif shl r1d, 4 %define sigoffq r8 @@ -696,7 +674,7 @@ xor r10d, r10d cmp countcatd, 63 je .sigmap_8x8 - SIGMAP_LOOP 0, r12d, countcatd, + SIGMAP_LOOP 0, r12d, countcatd .sigmap_8x8: SIGMAP_LOOP 1, r11d, 63, _8x8 .level_loop_start: @@ -779,7 +757,6 @@ RET %endmacro -%if ARCH_X86_64 INIT_XMM sse2 CABAC_RESIDUAL coeff_last_sse2 INIT_XMM lzcnt diff -Nru x264-0.157.2935+git545de2f/common/x86/const-a.asm x264-0.160.3011+gitcde9a93/common/x86/const-a.asm --- x264-0.157.2935+git545de2f/common/x86/const-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/const-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* const-a.asm: x86 global constants ;***************************************************************************** -;* Copyright (C) 2010-2018 x264 project +;* Copyright (C) 2010-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -56,7 +56,7 @@ const pw_8, times 8 dw 8 const pw_64, times 8 dw 64 const pw_256, times 8 dw 256 -const pw_32_0, times 4 dw 32, +const pw_32_0, times 4 dw 32 times 4 dw 0 const pw_8000, times 8 dw 0x8000 const pw_3fff, times 8 dw 0x3fff diff -Nru x264-0.157.2935+git545de2f/common/x86/cpu-a.asm x264-0.160.3011+gitcde9a93/common/x86/cpu-a.asm --- x264-0.157.2935+git545de2f/common/x86/cpu-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/cpu-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* cpu-a.asm: x86 cpu utilities ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Laurent Aimar ;* Loren Merritt @@ -78,33 +78,7 @@ sfence ret -%if ARCH_X86_64 - -;----------------------------------------------------------------------------- -; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args) -;----------------------------------------------------------------------------- -cvisible stack_align - mov rax, r0mp - mov r0, r1mp - mov r1, r2mp - mov r2, r3mp - mov r3, r4mp - mov r4, r5mp - push rbp - mov rbp, rsp -%if WIN64 - sub rsp, 40 ; shadow space + r4 -%endif - and rsp, ~(STACK_ALIGNMENT-1) -%if WIN64 - mov [rsp+32], r4 -%endif - call rax - leave - ret - -%else - +%if ARCH_X86_64 == 0 ;----------------------------------------------------------------------------- ; int cpu_cpuid_test( void ) ; return 0 if unsupported @@ -130,24 +104,4 @@ pop ebx popfd ret - -cvisible stack_align - push ebp - mov ebp, esp - sub esp, 20 - and esp, ~(STACK_ALIGNMENT-1) - mov r0, [ebp+12] - mov r1, [ebp+16] - mov r2, [ebp+20] - mov [esp+ 0], r0 - mov [esp+ 4], r1 - mov [esp+ 8], r2 - mov r0, [ebp+24] - mov r1, [ebp+28] - mov [esp+12], r0 - mov [esp+16], r1 - call [ebp+ 8] - leave - ret - %endif diff -Nru x264-0.157.2935+git545de2f/common/x86/dct-32.asm x264-0.160.3011+gitcde9a93/common/x86/dct-32.asm --- x264-0.157.2935+git545de2f/common/x86/dct-32.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/dct-32.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-32.asm: x86_32 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz diff -Nru x264-0.157.2935+git545de2f/common/x86/dct-64.asm x264-0.160.3011+gitcde9a93/common/x86/dct-64.asm --- x264-0.157.2935+git545de2f/common/x86/dct-64.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/dct-64.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-64.asm: x86_64 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz diff -Nru x264-0.157.2935+git545de2f/common/x86/dct-a.asm x264-0.160.3011+gitcde9a93/common/x86/dct-a.asm --- x264-0.157.2935+git545de2f/common/x86/dct-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/dct-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* dct-a.asm: x86 transform and zigzag ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Holger Lubitz ;* Loren Merritt diff -Nru x264-0.157.2935+git545de2f/common/x86/dct.h x264-0.160.3011+gitcde9a93/common/x86/dct.h --- x264-0.157.2935+git545de2f/common/x86/dct.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/dct.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * dct.h: x86 transform and zigzag ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/common/x86/deblock-a.asm x264-0.160.3011+gitcde9a93/common/x86/deblock-a.asm --- x264-0.157.2935+git545de2f/common/x86/deblock-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/deblock-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* deblock-a.asm: x86 deblocking ;***************************************************************************** -;* Copyright (C) 2005-2018 x264 project +;* Copyright (C) 2005-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser diff -Nru x264-0.157.2935+git545de2f/common/x86/deblock.h x264-0.160.3011+gitcde9a93/common/x86/deblock.h --- x264-0.157.2935+git545de2f/common/x86/deblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/deblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * deblock.h: x86 deblocking ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/common/x86/mc-a2.asm x264-0.160.3011+gitcde9a93/common/x86/mc-a2.asm --- x264-0.157.2935+git545de2f/common/x86/mc-a2.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/mc-a2.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a2.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2005-2018 x264 project +;* Copyright (C) 2005-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser diff -Nru x264-0.157.2935+git545de2f/common/x86/mc-a.asm x264-0.160.3011+gitcde9a93/common/x86/mc-a.asm --- x264-0.157.2935+git545de2f/common/x86/mc-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/mc-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* mc-a.asm: x86 motion compensation ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -1331,7 +1331,7 @@ sub r4, r2 shl r6, 4 ;jump = (offset + align*2)*48 %define avg_w16_addr avg_w16_align1_1_ssse3-(avg_w16_align2_2_ssse3-avg_w16_align1_1_ssse3) -%ifdef PIC +%if ARCH_X86_64 lea r7, [avg_w16_addr] add r6, r7 %else @@ -2020,7 +2020,7 @@ %if cpuflag(cache64) mov t0d, r3d and t0d, 7 -%ifdef PIC +%if ARCH_X86_64 lea t1, [ch_shuf_adj] movddup xm5, [t1 + t0*4] %else diff -Nru x264-0.157.2935+git545de2f/common/x86/mc-c.c x264-0.160.3011+gitcde9a93/common/x86/mc-c.c --- x264-0.157.2935+git545de2f/common/x86/mc-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/mc-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc-c.c: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -537,7 +537,7 @@ return; } w->weightfn = h->mc.weight; - den1 = 1 << (w->i_denom - 1) | w->i_offset << w->i_denom; + den1 = w->i_offset << w->i_denom | (w->i_denom ? 1 << (w->i_denom - 1) : 0); for( i = 0; i < 8; i++ ) { w->cachea[i] = w->i_scale; @@ -739,28 +739,32 @@ #define MC_CLIP_ADD(s,x)\ do\ {\ - int temp;\ + int temp_s = s;\ + int temp_x = x;\ asm("movd %0, %%xmm0 \n"\ - "movd %2, %%xmm1 \n"\ + "movd %1, %%xmm1 \n"\ "paddsw %%xmm1, %%xmm0 \n"\ - "movd %%xmm0, %1 \n"\ - :"+m"(s), "=&r"(temp)\ - :"m"(x)\ + "movd %%xmm0, %0 \n"\ + :"+&r"(temp_s)\ + :"r"(temp_x)\ );\ - s = temp;\ + s = temp_s;\ } while( 0 ) #undef MC_CLIP_ADD2 #define MC_CLIP_ADD2(s,x)\ do\ {\ + x264_union32_t temp = { .w={ (s)[0], (s)[1] } };\ asm("movd %0, %%xmm0 \n"\ "movd %1, %%xmm1 \n"\ "paddsw %%xmm1, %%xmm0 \n"\ "movd %%xmm0, %0 \n"\ - :"+m"(M32(s))\ + :"+&r"(temp)\ :"m"(M32(x))\ );\ + (s)[0] = temp.w[0];\ + (s)[1] = temp.w[1];\ } while( 0 ) #endif @@ -787,7 +791,7 @@ } #endif -void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) +void x264_mc_init_mmx( uint32_t cpu, x264_mc_functions_t *pf ) { if( !(cpu&X264_CPU_MMX) ) return; diff -Nru x264-0.157.2935+git545de2f/common/x86/mc.h x264-0.160.3011+gitcde9a93/common/x86/mc.h --- x264-0.157.2935+git545de2f/common/x86/mc.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/mc.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mc.h: x86 motion compensation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -28,6 +28,6 @@ #define X264_X86_MC_H #define x264_mc_init_mmx x264_template(mc_init_mmx) -void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ); +void x264_mc_init_mmx( uint32_t cpu, x264_mc_functions_t *pf ); #endif diff -Nru x264-0.157.2935+git545de2f/common/x86/pixel-32.asm x264-0.160.3011+gitcde9a93/common/x86/pixel-32.asm --- x264-0.157.2935+git545de2f/common/x86/pixel-32.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/pixel-32.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel-32.asm: x86_32 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/common/x86/pixel-a.asm x264-0.160.3011+gitcde9a93/common/x86/pixel-a.asm --- x264-0.157.2935+git545de2f/common/x86/pixel-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/pixel-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* pixel.asm: x86 pixel metrics ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -131,6 +131,7 @@ sw_f0: dq 0xfff0, 0 pd_f0: times 4 dd 0xffff0000 +pd_2: times 4 dd 2 pw_76543210: dw 0, 1, 2, 3, 4, 5, 6, 7 @@ -2223,7 +2224,7 @@ ;----------------------------------------------------------------------------- ; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res ) ;----------------------------------------------------------------------------- -cglobal intra_sa8d_x3_8x8, 3,3,14 +cglobal intra_sa8d_x3_8x8, 3,3,13 ; 8x8 hadamard pxor m8, m8 movq m0, [r0+0*FENC_STRIDE] @@ -2245,77 +2246,80 @@ HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 8 - ABSW2 m8, m9, m2, m3, m2, m3 - ABSW2 m10, m11, m4, m5, m4, m5 - paddusw m8, m10 - paddusw m9, m11 - ABSW2 m10, m11, m6, m7, m6, m7 - ABSW m13, m1, m1 - paddusw m10, m11 - paddusw m8, m9 - paddusw m13, m10 - paddusw m13, m8 + ABSW2 m8, m9, m2, m3, m2, m3 + ABSW2 m10, m11, m4, m5, m4, m5 + paddw m8, m10 + paddw m9, m11 + ABSW2 m10, m11, m6, m7, m6, m7 + ABSW m12, m1, m1 + paddw m10, m11 + paddw m8, m9 + paddw m12, m10 + paddw m12, m8 ; 1D hadamard of edges - movq m8, [r1+7] - movq m9, [r1+16] - pxor m10, m10 - punpcklbw m8, m10 - punpcklbw m9, m10 + movq m8, [r1+7] + movq m9, [r1+16] + pxor m10, m10 + punpcklbw m8, m10 + punpcklbw m9, m10 HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q1032, [pw_ppppmmmm] HSUMSUB2 pmullw, m8, m9, m10, m11, m11, q2301, [pw_ppmmppmm] - pshuflw m10, m8, q2301 - pshuflw m11, m9, q2301 - pshufhw m10, m10, q2301 - pshufhw m11, m11, q2301 - pmullw m8, [pw_pmpmpmpm] - pmullw m11, [pw_pmpmpmpm] - paddw m8, m10 - paddw m9, m11 + pshuflw m10, m8, q2301 + pshuflw m11, m9, q2301 + pshufhw m10, m10, q2301 + pshufhw m11, m11, q2301 + pmullw m8, [pw_pmpmpmpm] + pmullw m11, [pw_pmpmpmpm] + paddw m8, m10 + paddw m9, m11 ; differences - paddw m10, m8, m9 - paddw m10, [pw_8] - pand m10, [sw_f0] - psllw m10, 2 ; dc - - psllw m8, 3 ; left edge - psubw m8, m0 - psubw m10, m0 - ABSW2 m8, m10, m8, m10, m11, m12 ; 1x8 sum - paddusw m8, m13 - paddusw m13, m10 - punpcklwd m0, m1 - punpcklwd m2, m3 - punpcklwd m4, m5 - punpcklwd m6, m7 - punpckldq m0, m2 - punpckldq m4, m6 - punpcklqdq m0, m4 ; transpose - psllw m9, 3 ; top edge - psrldq m2, m13, 2 ; 8x7 sum - psubw m0, m9 ; 8x1 sum - ABSW m0, m0, m9 - paddusw m2, m0 + paddw m10, m8, m9 + paddw m10, [pw_8] + pand m10, [sw_f0] + psllw m8, 3 ; left edge + psllw m10, 2 ; dc + psubw m8, m0 + psubw m10, m0 + punpcklwd m0, m1 + punpcklwd m2, m3 + punpcklwd m4, m5 + punpcklwd m6, m7 + ABSW m10, m10, m1 + paddw m10, m12 + punpckldq m0, m2 + punpckldq m4, m6 + punpcklqdq m0, m4 ; transpose + psllw m9, 3 ; top edge + psrldq m2, m10, 2 ; 8x7 sum + psubw m0, m9 ; 8x1 sum + ABSW2 m8, m0, m8, m0, m1, m3 ; 1x8 sum + paddw m8, m12 + paddusw m2, m0 ; 3x HADDW - movdqa m7, [pw_1] - pmaddwd m2, m7 - pmaddwd m8, m7 - pmaddwd m13, m7 - punpckhdq m3, m2, m8 - punpckldq m2, m8 - pshufd m5, m13, q3311 - paddd m2, m3 - paddd m5, m13 - punpckhqdq m0, m2, m5 - punpcklqdq m2, m5 - pavgw m0, m2 - pxor m1, m1 - pavgw m0, m1 - movq [r2], m0 ; i8x8_v, i8x8_h - psrldq m0, 8 - movd [r2+8], m0 ; i8x8_dc + mova m7, [pd_f0] + pandn m0, m7, m10 + psrld m10, 16 + pandn m1, m7, m8 + psrld m8, 16 + pandn m7, m2 + psrld m2, 16 + paddd m0, m10 + paddd m1, m8 + paddd m2, m7 + pshufd m3, m0, q2301 + punpckhdq m4, m2, m1 + punpckldq m2, m1 + paddd m3, m0 + paddd m2, m4 + punpckhqdq m0, m2, m3 + punpcklqdq m2, m3 + paddd m0, [pd_2] + paddd m0, m2 + psrld m0, 2 + mova [r2], m0 RET %endif ; ARCH_X86_64 %endmacro ; INTRA_SA8D_SSE2 @@ -2862,7 +2866,7 @@ ; output the predicted samples mov r3d, eax shr r3d, 16 -%ifdef PIC +%if ARCH_X86_64 lea r2, [%2_lut] movzx r2d, byte [r2+r3] %else @@ -5099,7 +5103,7 @@ je .skip ; faster only if this is the common case; remove branch if we use ssim on a macroblock level neg r2 -%ifdef PIC +%if ARCH_X86_64 lea r3, [mask_ff + 16] %xdefine %%mask r3 %else @@ -5549,7 +5553,7 @@ add r5, r6 xor r0d, r0d ; nmv mov [r5], r0d -%ifdef PIC +%if ARCH_X86_64 lea r1, [$$] %define GLOBAL +r1-$$ %else diff -Nru x264-0.157.2935+git545de2f/common/x86/pixel.h x264-0.160.3011+gitcde9a93/common/x86/pixel.h --- x264-0.157.2935+git545de2f/common/x86/pixel.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/pixel.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * pixel.h: x86 pixel metrics ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/common/x86/predict-a.asm x264-0.160.3011+gitcde9a93/common/x86/predict-a.asm --- x264-0.157.2935+git545de2f/common/x86/predict-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/predict-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* predict-a.asm: x86 intra prediction ;***************************************************************************** -;* Copyright (C) 2005-2018 x264 project +;* Copyright (C) 2005-2020 x264 project ;* ;* Authors: Loren Merritt ;* Holger Lubitz @@ -688,7 +688,7 @@ je .fix_lt_2 .do_top: and r2d, 4 -%ifdef PIC +%if ARCH_X86_64 lea r3, [shuf_fixtr] pshufb m3, [r3+r2*4] %else diff -Nru x264-0.157.2935+git545de2f/common/x86/predict-c.c x264-0.160.3011+gitcde9a93/common/x86/predict-c.c --- x264-0.157.2935+git545de2f/common/x86/predict-c.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/predict-c.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict-c.c: intra prediction ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -311,7 +311,7 @@ /**************************************************************************** * Exported functions: ****************************************************************************/ -void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ) +void x264_predict_16x16_init_mmx( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_MMX2) ) return; @@ -370,7 +370,7 @@ } } -void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x8c_init_mmx( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_MMX) ) return; @@ -428,7 +428,7 @@ } } -void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] ) +void x264_predict_8x16c_init_mmx( uint32_t cpu, x264_predict_t pf[7] ) { if( !(cpu&X264_CPU_MMX) ) return; @@ -479,7 +479,7 @@ } } -void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ) +void x264_predict_8x8_init_mmx( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ) { if( !(cpu&X264_CPU_MMX2) ) return; @@ -563,7 +563,7 @@ #endif // HIGH_BIT_DEPTH } -void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] ) +void x264_predict_4x4_init_mmx( uint32_t cpu, x264_predict_t pf[12] ) { if( !(cpu&X264_CPU_MMX2) ) return; diff -Nru x264-0.157.2935+git545de2f/common/x86/predict.h x264-0.160.3011+gitcde9a93/common/x86/predict.h --- x264-0.157.2935+git545de2f/common/x86/predict.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/predict.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * predict.h: x86 intra prediction ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -28,15 +28,15 @@ #define X264_X86_PREDICT_H #define x264_predict_16x16_init_mmx x264_template(predict_16x16_init_mmx) -void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ); +void x264_predict_16x16_init_mmx( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x16c_init_mmx x264_template(predict_8x16c_init_mmx) -void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x16c_init_mmx( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_8x8c_init_mmx x264_template(predict_8x8c_init_mmx) -void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x8c_init_mmx ( uint32_t cpu, x264_predict_t pf[7] ); #define x264_predict_4x4_init_mmx x264_template(predict_4x4_init_mmx) -void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] ); +void x264_predict_4x4_init_mmx ( uint32_t cpu, x264_predict_t pf[12] ); #define x264_predict_8x8_init_mmx x264_template(predict_8x8_init_mmx) -void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ); +void x264_predict_8x8_init_mmx ( uint32_t cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ); #define x264_predict_16x16_v_mmx2 x264_template(predict_16x16_v_mmx2) void x264_predict_16x16_v_mmx2( pixel *src ); diff -Nru x264-0.157.2935+git545de2f/common/x86/quant-a.asm x264-0.160.3011+gitcde9a93/common/x86/quant-a.asm --- x264-0.157.2935+git545de2f/common/x86/quant-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/quant-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* quant-a.asm: x86 quantization and level-run ;***************************************************************************** -;* Copyright (C) 2005-2018 x264 project +;* Copyright (C) 2005-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -673,7 +673,7 @@ sub t2d, t0d sub t2d, t1d ; i_mf = i_qp % 6 shl t2d, %2 -%ifdef PIC +%if ARCH_X86_64 lea r1, [dequant%1_scale] add r1, t2 %else @@ -761,7 +761,7 @@ sub t2d, t1d ; i_mf = i_qp % 6 shl t2d, %1 %if %2 -%ifdef PIC +%if ARCH_X86_64 %define dmf r1+t2 lea r1, [dequant8_scale] %else @@ -1449,7 +1449,7 @@ shr edx, 1 %endif %endif -%ifdef PIC +%if ARCH_X86_64 lea r4, [decimate_mask_table4] %define mask_table r4 %else @@ -1580,16 +1580,11 @@ add eax, r3d jnz .ret9 %endif -%ifdef PIC - lea r4, [decimate_table8] - %define table r4 -%else - %define table decimate_table8 -%endif + lea r4, [decimate_table8] mov al, -6 .loop: tzcnt rcx, r1 - add al, byte [table + rcx] + add al, byte [r4 + rcx] jge .ret9 shr r1, 1 SHRX r1, rcx @@ -2165,7 +2160,7 @@ %macro COEFF_LEVELRUN_LUT 1 cglobal coeff_level_run%1,2,4+(%1/9) -%ifdef PIC +%if ARCH_X86_64 lea r5, [$$] %define GLOBAL +r5-$$ %else diff -Nru x264-0.157.2935+git545de2f/common/x86/quant.h x264-0.160.3011+gitcde9a93/common/x86/quant.h --- x264-0.157.2935+git545de2f/common/x86/quant.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/quant.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * quant.h: x86 quantization and level-run ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser diff -Nru x264-0.157.2935+git545de2f/common/x86/sad16-a.asm x264-0.160.3011+gitcde9a93/common/x86/sad16-a.asm --- x264-0.157.2935+git545de2f/common/x86/sad16-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/sad16-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad16-a.asm: x86 high depth sad functions ;***************************************************************************** -;* Copyright (C) 2010-2018 x264 project +;* Copyright (C) 2010-2020 x264 project ;* ;* Authors: Oskar Arvidsson ;* Henrik Gramner diff -Nru x264-0.157.2935+git545de2f/common/x86/sad-a.asm x264-0.160.3011+gitcde9a93/common/x86/sad-a.asm --- x264-0.157.2935+git545de2f/common/x86/sad-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/sad-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* sad-a.asm: x86 sad functions ;***************************************************************************** -;* Copyright (C) 2003-2018 x264 project +;* Copyright (C) 2003-2020 x264 project ;* ;* Authors: Loren Merritt ;* Fiona Glaser @@ -1920,7 +1920,7 @@ shl r4d, 4 ; code size = 80 %endif %define sad_w16_addr (sad_w16_align1_%1 + (sad_w16_align1_%1 - sad_w16_align2_%1)) -%ifdef PIC +%if ARCH_X86_64 lea r5, [sad_w16_addr] add r5, r4 %else diff -Nru x264-0.157.2935+git545de2f/common/x86/trellis-64.asm x264-0.160.3011+gitcde9a93/common/x86/trellis-64.asm --- x264-0.157.2935+git545de2f/common/x86/trellis-64.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/trellis-64.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* trellis-64.asm: x86_64 trellis quantization ;***************************************************************************** -;* Copyright (C) 2012-2018 x264 project +;* Copyright (C) 2012-2020 x264 project ;* ;* Authors: Loren Merritt ;* @@ -202,7 +202,6 @@ paddd m6, m6 %define unquant_mf m6 %endif -%ifdef PIC %if dc == 0 mov unquant_mfm, unquant_mfq %endif @@ -212,9 +211,6 @@ ; (Any address in .text would work, this one was just convenient.) lea r0, [$$] %define GLOBAL +r0-$$ -%else - %define GLOBAL -%endif TRELLIS_LOOP 0 ; node_ctx 0..3 TRELLIS_LOOP 1 ; node_ctx 1..7 @@ -304,12 +300,8 @@ mov r10, cabac_state_sigm %if num_coefs == 64 mov r6d, b_interlacedm -%ifdef PIC add r6d, iid movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 GLOBAL] -%else - movzx r6d, byte [significant_coeff_flag_offset_8x8 + r6 + iiq] -%endif movzx r10, byte [r10 + r6] %elif num_coefs == 8 movzx r13, byte [coeff_flag_offset_chroma_422_dc + iiq GLOBAL] @@ -408,12 +400,8 @@ %if dc pmuludq m0, unquant_mf %else -%ifdef PIC mov r10, unquant_mfm LOAD_DUP m3, [r10 + zigzagiq*4] -%else - LOAD_DUP m3, [unquant_mfq + zigzagiq*4] -%endif pmuludq m0, m3 %endif paddd m0, [pq_128] diff -Nru x264-0.157.2935+git545de2f/common/x86/util.h x264-0.160.3011+gitcde9a93/common/x86/util.h --- x264-0.157.2935+git545de2f/common/x86/util.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/util.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * util.h: x86 inline asm ***************************************************************************** - * Copyright (C) 2008-2018 x264 project + * Copyright (C) 2008-2020 x264 project * * Authors: Fiona Glaser * Loren Merritt @@ -33,7 +33,7 @@ #undef M128_ZERO #define M128_ZERO ((__m128){0,0,0,0}) #define x264_union128_t x264_union128_sse_t -typedef union { __m128 i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_sse_t; +typedef union { __m128 i; uint64_t q[2]; uint32_t d[4]; uint16_t w[8]; uint8_t b[16]; } MAY_ALIAS x264_union128_sse_t; #if HAVE_VECTOREXT typedef uint32_t v4si __attribute__((vector_size (16))); #endif diff -Nru x264-0.157.2935+git545de2f/common/x86/x86inc.asm x264-0.160.3011+gitcde9a93/common/x86/x86inc.asm --- x264-0.157.2935+git545de2f/common/x86/x86inc.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/x86inc.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2018 x264 project +;* Copyright (C) 2005-2020 x264 project ;* ;* Authors: Loren Merritt ;* Henrik Gramner @@ -65,12 +65,19 @@ %endif %define FORMAT_ELF 0 +%define FORMAT_MACHO 0 %ifidn __OUTPUT_FORMAT__,elf %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf32 %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf64 %define FORMAT_ELF 1 +%elifidn __OUTPUT_FORMAT__,macho + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho32 + %define FORMAT_MACHO 1 +%elifidn __OUTPUT_FORMAT__,macho64 + %define FORMAT_MACHO 1 %endif %ifdef PREFIX @@ -89,20 +96,21 @@ %endif %endmacro -%if WIN64 - %define PIC -%elif ARCH_X86_64 == 0 -; x86_32 doesn't require PIC. -; Some distros prefer shared objects to be PIC, but nothing breaks if -; the code contains a few textrels, so we'll skip that complexity. - %undef PIC -%endif -%ifdef PIC +%if ARCH_X86_64 + %define PIC 1 ; always use PIC on x86-64 default rel +%elifidn __OUTPUT_FORMAT__,win32 + %define PIC 0 ; PIC isn't used on 32-bit Windows +%elifndef PIC + %define PIC 0 %endif +%define HAVE_PRIVATE_EXTERN 1 %ifdef __NASM_VER__ %use smartalign + %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14 + %define HAVE_PRIVATE_EXTERN 0 + %endif %endif ; Macros to eliminate most code duplication between x86_32 and x86_64: @@ -220,6 +228,18 @@ %define gprsize 4 %endif +%macro LEA 2 +%if ARCH_X86_64 + lea %1, [%2] +%elif PIC + call $+5 ; special-cased to not affect the RSB on most CPU:s + pop %1 + add %1, (%2)-$+1 +%else + mov %1, %2 +%endif +%endmacro + %macro PUSH 1 push %1 %ifidn rstk, rsp @@ -281,6 +301,10 @@ %endif %endmacro +%if ARCH_X86_64 == 0 + %define movsxd movifnidn +%endif + %macro movsxdifnidn 2 %ifnidn %1, %2 movsxd %1, %2 @@ -332,7 +356,7 @@ %define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512))) %define high_mm_regs (16*cpuflag(avx512)) -%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) +%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs (for win64 only) %ifnum %1 %if %1 != 0 %assign %%pad 0 @@ -377,7 +401,7 @@ %endif %endmacro -%macro SETUP_STACK_POINTER 1 +%macro SETUP_STACK_POINTER 0-1 0 %ifnum %1 %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT %if %1 > 0 @@ -399,16 +423,6 @@ %endif %endmacro -%macro DEFINE_ARGS_INTERNAL 3+ - %ifnum %2 - DEFINE_ARGS %3 - %elif %1 == 4 - DEFINE_ARGS %2 - %elif %1 > 4 - DEFINE_ARGS %2, %3 - %endif -%endmacro - %if WIN64 ; Windows x64 ;================================================= DECLARE_REG 0, rcx @@ -427,7 +441,7 @@ DECLARE_REG 13, R12, 112 DECLARE_REG 14, R13, 120 -%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... +%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 ASSERT regs_used >= num_args @@ -439,7 +453,15 @@ WIN64_SPILL_XMM %3 %endif LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 + %if %0 > 4 + %ifnum %4 + DEFINE_ARGS %5 + %else + DEFINE_ARGS %4, %5 + %endif + %elifnnum %4 + DEFINE_ARGS %4 + %endif %endmacro %macro WIN64_PUSH_XMM 0 @@ -535,7 +557,7 @@ DECLARE_REG 13, R12, 64 DECLARE_REG 14, R13, 72 -%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... +%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 %assign xmm_regs_used %3 @@ -545,7 +567,15 @@ PUSH_IF_USED 9, 10, 11, 12, 13, 14 ALLOC_STACK %4 LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 + %if %0 > 4 + %ifnum %4 + DEFINE_ARGS %5 + %else + DEFINE_ARGS %4, %5 + %endif + %elifnnum %4 + DEFINE_ARGS %4 + %endif %endmacro %define has_epilogue regs_used > 9 || stack_size > 0 || vzeroupper_required @@ -586,7 +616,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 -%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... +%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 ASSERT regs_used >= num_args @@ -601,7 +631,15 @@ PUSH_IF_USED 3, 4, 5, 6 ALLOC_STACK %4 LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 - DEFINE_ARGS_INTERNAL %0, %4, %5 + %if %0 > 4 + %ifnum %4 + DEFINE_ARGS %5 + %else + DEFINE_ARGS %4, %5 + %endif + %elifnnum %4 + DEFINE_ARGS %4 + %endif %endmacro %define has_epilogue regs_used > 3 || stack_size > 0 || vzeroupper_required @@ -669,7 +707,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp -%macro TAIL_CALL 2 ; callee, is_nonadjacent +%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent %if has_epilogue call %1 RET @@ -699,22 +737,25 @@ %endmacro %macro cglobal_internal 2-3+ annotate_function_size - %if %1 - %xdefine %%FUNCTION_PREFIX private_prefix - %xdefine %%VISIBILITY hidden - %else - %xdefine %%FUNCTION_PREFIX public_prefix - %xdefine %%VISIBILITY - %endif %ifndef cglobaled_%2 - %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) + %if %1 + %xdefine %2 mangle(private_prefix %+ _ %+ %2) + %else + %xdefine %2 mangle(public_prefix %+ _ %+ %2) + %endif %xdefine %2.skip_prologue %2 %+ .skip_prologue CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 %xdefine current_function_section __SECT__ %if FORMAT_ELF - global %2:function %%VISIBILITY + %if %1 + global %2:function hidden + %else + global %2:function + %endif + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN && %1 + global %2:private_extern %else global %2 %endif @@ -735,6 +776,8 @@ %macro cglobal_label 1 %if FORMAT_ELF global current_function %+ %1:function hidden + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN + global current_function %+ %1:private_extern %else global current_function %+ %1 %endif @@ -760,6 +803,8 @@ %xdefine %1 mangle(private_prefix %+ _ %+ %1) %if FORMAT_ELF global %1:data hidden + %elif FORMAT_MACHO && HAVE_PRIVATE_EXTERN + global %1:private_extern %else global %1 %endif @@ -804,19 +849,20 @@ %assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 %assign cpuflags_sse42 (1<<11)| cpuflags_sse4 %assign cpuflags_aesni (1<<12)| cpuflags_sse42 -%assign cpuflags_avx (1<<13)| cpuflags_sse42 -%assign cpuflags_xop (1<<14)| cpuflags_avx -%assign cpuflags_fma4 (1<<15)| cpuflags_avx -%assign cpuflags_fma3 (1<<16)| cpuflags_avx -%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 -%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 -%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL - -%assign cpuflags_cache32 (1<<21) -%assign cpuflags_cache64 (1<<22) -%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<24) +%assign cpuflags_gfni (1<<13)| cpuflags_sse42 +%assign cpuflags_avx (1<<14)| cpuflags_sse42 +%assign cpuflags_xop (1<<15)| cpuflags_avx +%assign cpuflags_fma4 (1<<16)| cpuflags_avx +%assign cpuflags_fma3 (1<<17)| cpuflags_avx +%assign cpuflags_bmi1 (1<<18)| cpuflags_avx|cpuflags_lzcnt +%assign cpuflags_bmi2 (1<<19)| cpuflags_bmi1 +%assign cpuflags_avx2 (1<<20)| cpuflags_fma3|cpuflags_bmi2 +%assign cpuflags_avx512 (1<<21)| cpuflags_avx2 ; F, CD, BW, DQ, VL + +%assign cpuflags_cache32 (1<<22) +%assign cpuflags_cache64 (1<<23) +%assign cpuflags_aligned (1<<24) ; not a cpu feature, but a function variant +%assign cpuflags_atom (1<<25) ; Returns a boolean value expressing whether or not the specified cpuflag is enabled. %define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) @@ -1208,8 +1254,22 @@ %ifdef cpuname %if notcpuflag(%2) %error use of ``%1'' %2 instruction in cpuname function: current_function - %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 + %elif %3 == 0 && __sizeofreg == 16 && notcpuflag(sse2) %error use of ``%1'' sse2 instruction in cpuname function: current_function + %elif %3 == 0 && __sizeofreg == 32 && notcpuflag(avx2) + %error use of ``%1'' avx2 instruction in cpuname function: current_function + %elif __sizeofreg == 16 && notcpuflag(sse) + %error use of ``%1'' sse instruction in cpuname function: current_function + %elif __sizeofreg == 32 && notcpuflag(avx) + %error use of ``%1'' avx instruction in cpuname function: current_function + %elif __sizeofreg == 64 && notcpuflag(avx512) + %error use of ``%1'' avx512 instruction in cpuname function: current_function + %elifidn %1, pextrw ; special case because the base instruction is mmx2, + %ifnid %6 ; but sse4 is required for memory operands + %if notcpuflag(sse4) + %error use of ``%1'' sse4 instruction in cpuname function: current_function + %endif + %endif %endif %endif %endif @@ -1371,38 +1431,41 @@ AVX_INSTR cmpunordps, sse, 1, 0, 1 AVX_INSTR cmpunordsd, sse2, 1, 0, 0 AVX_INSTR cmpunordss, sse, 1, 0, 0 -AVX_INSTR comisd, sse2 -AVX_INSTR comiss, sse -AVX_INSTR cvtdq2pd, sse2 -AVX_INSTR cvtdq2ps, sse2 -AVX_INSTR cvtpd2dq, sse2 -AVX_INSTR cvtpd2ps, sse2 -AVX_INSTR cvtps2dq, sse2 -AVX_INSTR cvtps2pd, sse2 -AVX_INSTR cvtsd2si, sse2 +AVX_INSTR comisd, sse2, 1 +AVX_INSTR comiss, sse, 1 +AVX_INSTR cvtdq2pd, sse2, 1 +AVX_INSTR cvtdq2ps, sse2, 1 +AVX_INSTR cvtpd2dq, sse2, 1 +AVX_INSTR cvtpd2ps, sse2, 1 +AVX_INSTR cvtps2dq, sse2, 1 +AVX_INSTR cvtps2pd, sse2, 1 +AVX_INSTR cvtsd2si, sse2, 1 AVX_INSTR cvtsd2ss, sse2, 1, 0, 0 AVX_INSTR cvtsi2sd, sse2, 1, 0, 0 AVX_INSTR cvtsi2ss, sse, 1, 0, 0 AVX_INSTR cvtss2sd, sse2, 1, 0, 0 -AVX_INSTR cvtss2si, sse -AVX_INSTR cvttpd2dq, sse2 -AVX_INSTR cvttps2dq, sse2 -AVX_INSTR cvttsd2si, sse2 -AVX_INSTR cvttss2si, sse +AVX_INSTR cvtss2si, sse, 1 +AVX_INSTR cvttpd2dq, sse2, 1 +AVX_INSTR cvttps2dq, sse2, 1 +AVX_INSTR cvttsd2si, sse2, 1 +AVX_INSTR cvttss2si, sse, 1 AVX_INSTR divpd, sse2, 1, 0, 0 AVX_INSTR divps, sse, 1, 0, 0 AVX_INSTR divsd, sse2, 1, 0, 0 AVX_INSTR divss, sse, 1, 0, 0 AVX_INSTR dppd, sse4, 1, 1, 0 AVX_INSTR dpps, sse4, 1, 1, 0 -AVX_INSTR extractps, sse4 +AVX_INSTR extractps, sse4, 1 +AVX_INSTR gf2p8affineinvqb, gfni, 0, 1, 0 +AVX_INSTR gf2p8affineqb, gfni, 0, 1, 0 +AVX_INSTR gf2p8mulb, gfni, 0, 0, 0 AVX_INSTR haddpd, sse3, 1, 0, 0 AVX_INSTR haddps, sse3, 1, 0, 0 AVX_INSTR hsubpd, sse3, 1, 0, 0 AVX_INSTR hsubps, sse3, 1, 0, 0 AVX_INSTR insertps, sse4, 1, 1, 0 AVX_INSTR lddqu, sse3 -AVX_INSTR ldmxcsr, sse +AVX_INSTR ldmxcsr, sse, 1 AVX_INSTR maskmovdqu, sse2 AVX_INSTR maxpd, sse2, 1, 0, 1 AVX_INSTR maxps, sse, 1, 0, 1 @@ -1412,10 +1475,10 @@ AVX_INSTR minps, sse, 1, 0, 1 AVX_INSTR minsd, sse2, 1, 0, 0 AVX_INSTR minss, sse, 1, 0, 0 -AVX_INSTR movapd, sse2 -AVX_INSTR movaps, sse +AVX_INSTR movapd, sse2, 1 +AVX_INSTR movaps, sse, 1 AVX_INSTR movd, mmx -AVX_INSTR movddup, sse3 +AVX_INSTR movddup, sse3, 1 AVX_INSTR movdqa, sse2 AVX_INSTR movdqu, sse2 AVX_INSTR movhlps, sse, 1, 0, 0 @@ -1424,19 +1487,19 @@ AVX_INSTR movlhps, sse, 1, 0, 0 AVX_INSTR movlpd, sse2, 1, 0, 0 AVX_INSTR movlps, sse, 1, 0, 0 -AVX_INSTR movmskpd, sse2 -AVX_INSTR movmskps, sse +AVX_INSTR movmskpd, sse2, 1 +AVX_INSTR movmskps, sse, 1 AVX_INSTR movntdq, sse2 AVX_INSTR movntdqa, sse4 -AVX_INSTR movntpd, sse2 -AVX_INSTR movntps, sse +AVX_INSTR movntpd, sse2, 1 +AVX_INSTR movntps, sse, 1 AVX_INSTR movq, mmx AVX_INSTR movsd, sse2, 1, 0, 0 -AVX_INSTR movshdup, sse3 -AVX_INSTR movsldup, sse3 +AVX_INSTR movshdup, sse3, 1 +AVX_INSTR movsldup, sse3, 1 AVX_INSTR movss, sse, 1, 0, 0 -AVX_INSTR movupd, sse2 -AVX_INSTR movups, sse +AVX_INSTR movupd, sse2, 1 +AVX_INSTR movups, sse, 1 AVX_INSTR mpsadbw, sse4, 0, 1, 0 AVX_INSTR mulpd, sse2, 1, 0, 1 AVX_INSTR mulps, sse, 1, 0, 1 @@ -1569,27 +1632,27 @@ AVX_INSTR punpckldq, mmx, 0, 0, 0 AVX_INSTR punpcklqdq, sse2, 0, 0, 0 AVX_INSTR pxor, mmx, 0, 0, 1 -AVX_INSTR rcpps, sse +AVX_INSTR rcpps, sse, 1 AVX_INSTR rcpss, sse, 1, 0, 0 -AVX_INSTR roundpd, sse4 -AVX_INSTR roundps, sse4 +AVX_INSTR roundpd, sse4, 1 +AVX_INSTR roundps, sse4, 1 AVX_INSTR roundsd, sse4, 1, 1, 0 AVX_INSTR roundss, sse4, 1, 1, 0 -AVX_INSTR rsqrtps, sse +AVX_INSTR rsqrtps, sse, 1 AVX_INSTR rsqrtss, sse, 1, 0, 0 AVX_INSTR shufpd, sse2, 1, 1, 0 AVX_INSTR shufps, sse, 1, 1, 0 -AVX_INSTR sqrtpd, sse2 -AVX_INSTR sqrtps, sse +AVX_INSTR sqrtpd, sse2, 1 +AVX_INSTR sqrtps, sse, 1 AVX_INSTR sqrtsd, sse2, 1, 0, 0 AVX_INSTR sqrtss, sse, 1, 0, 0 -AVX_INSTR stmxcsr, sse +AVX_INSTR stmxcsr, sse, 1 AVX_INSTR subpd, sse2, 1, 0, 0 AVX_INSTR subps, sse, 1, 0, 0 AVX_INSTR subsd, sse2, 1, 0, 0 AVX_INSTR subss, sse, 1, 0, 0 -AVX_INSTR ucomisd, sse2 -AVX_INSTR ucomiss, sse +AVX_INSTR ucomisd, sse2, 1 +AVX_INSTR ucomiss, sse, 1 AVX_INSTR unpckhpd, sse2, 1, 0, 0 AVX_INSTR unpckhps, sse, 1, 0, 0 AVX_INSTR unpcklpd, sse2, 1, 0, 0 @@ -1602,6 +1665,38 @@ AVX_INSTR pfsub, 3dnow, 1, 0, 0 AVX_INSTR pfmul, 3dnow, 1, 0, 1 +;%1 == instruction +;%2 == minimal instruction set +%macro GPR_INSTR 2 + %macro %1 2-5 fnord, %1, %2 + %ifdef cpuname + %if notcpuflag(%5) + %error use of ``%4'' %5 instruction in cpuname function: current_function + %endif + %endif + %ifidn %3, fnord + %4 %1, %2 + %else + %4 %1, %2, %3 + %endif + %endmacro +%endmacro + +GPR_INSTR andn, bmi1 +GPR_INSTR bextr, bmi1 +GPR_INSTR blsi, bmi1 +GPR_INSTR blsr, bmi1 +GPR_INSTR blsmsk, bmi1 +GPR_INSTR bzhi, bmi2 +GPR_INSTR mulx, bmi2 +GPR_INSTR pdep, bmi2 +GPR_INSTR pext, bmi2 +GPR_INSTR popcnt, sse42 +GPR_INSTR rorx, bmi2 +GPR_INSTR sarx, bmi2 +GPR_INSTR shlx, bmi2 +GPR_INSTR shrx, bmi2 + ; base-4 constants for shuffles %assign i 0 %rep 256 diff -Nru x264-0.157.2935+git545de2f/common/x86/x86util.asm x264-0.160.3011+gitcde9a93/common/x86/x86util.asm --- x264-0.157.2935+git545de2f/common/x86/x86util.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/common/x86/x86util.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* x86util.asm: x86 utility macros ;***************************************************************************** -;* Copyright (C) 2008-2018 x264 project +;* Copyright (C) 2008-2020 x264 project ;* ;* Authors: Holger Lubitz ;* Loren Merritt @@ -584,8 +584,10 @@ %elif %1==2 %if mmsize==8 SBUTTERFLY dq, %3, %4, %5 - %else + %elif %0==6 TRANS q, ORDER, %3, %4, %5, %6 + %else + TRANS q, ORDER, %3, %4, %5 %endif %elif %1==4 SBUTTERFLY qdq, %3, %4, %5 diff -Nru x264-0.157.2935+git545de2f/config.guess x264-0.160.3011+gitcde9a93/config.guess --- x264-0.157.2935+git545de2f/config.guess 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/config.guess 2020-07-13 10:30:22.000000000 +0000 @@ -1,14 +1,12 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012 Free Software Foundation, Inc. +# Copyright 1992-2018 Free Software Foundation, Inc. -timestamp='2012-09-25' +timestamp='2018-02-24' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or +# the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -17,24 +15,22 @@ # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - - -# Originally written by Per Bothner. Please send patches (context -# diff format) to and include a ChangeLog -# entry. +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). # -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + me=`echo "$0" | sed -e 's,.*/,,'` @@ -43,7 +39,7 @@ Output the configuration name of the system \`$me' is run on. -Operation modes: +Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -54,9 +50,7 @@ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. +Copyright 1992-2018 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -113,9 +107,9 @@ dummy=$tmp/dummy ; tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; + ,,) echo "int x;" > "$dummy.c" ; for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then CC_FOR_BUILD="$c"; break ; fi ; done ; @@ -138,9 +132,37 @@ UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + eval "$set_cc_for_build" + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + # Note: order is significant - the case branches are not exclusive. -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, @@ -153,21 +175,31 @@ # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build + eval "$set_cc_for_build" if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ then @@ -182,44 +214,67 @@ os=netbsd ;; esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in + case "$UNAME_VERSION" in Debian*) release='-gnu' ;; *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" + echo "$machine-${os}${release}${abi}" exit ;; *:Bitrig:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" exit ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" exit ;; *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" exit ;; *:SolidBSD:*:*) - echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" exit ;; macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd${UNAME_RELEASE} + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" exit ;; *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -236,63 +291,54 @@ ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; + UNAME_MACHINE=alpha ;; "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; + UNAME_MACHINE=alphaev5 ;; "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; + UNAME_MACHINE=alphaev56 ;; "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; + UNAME_MACHINE=alphapca56 ;; "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; + UNAME_MACHINE=alphapca57 ;; "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; + UNAME_MACHINE=alphaev6 ;; "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; + UNAME_MACHINE=alphaev67 ;; "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; + UNAME_MACHINE=alphaev68 ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; + UNAME_MACHINE=alphaev69 ;; "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; + UNAME_MACHINE=alphaev7 ;; "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; + UNAME_MACHINE=alphaev79 ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 exit $exitcode ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos + echo "$UNAME_MACHINE"-unknown-amigaos exit ;; *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos + echo "$UNAME_MACHINE"-unknown-morphos exit ;; *:OS/390:*:*) echo i370-ibm-openedition @@ -304,7 +350,7 @@ echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} + echo arm-acorn-riscix"$UNAME_RELEASE" exit ;; arm*:riscos:*:*|arm*:RISCOS:*:*) echo arm-unknown-riscos @@ -331,38 +377,38 @@ sparc) echo sparc-icl-nx7; exit ;; esac ;; s390x:SunOS:*:*) - echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" exit ;; sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" exit ;; i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - echo i386-pc-auroraux${UNAME_RELEASE} + echo i386-pc-auroraux"$UNAME_RELEASE" exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - eval $set_cc_for_build - SUN_ARCH="i386" + eval "$set_cc_for_build" + SUN_ARCH=i386 # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH="x86_64" + SUN_ARCH=x86_64 fi fi - echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo "$SUN_ARCH"-pc-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in @@ -371,25 +417,25 @@ ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" exit ;; sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} + echo m68k-sun-sunos"$UNAME_RELEASE" exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) - echo m68k-sun-sunos${UNAME_RELEASE} + echo m68k-sun-sunos"$UNAME_RELEASE" ;; sun4) - echo sparc-sun-sunos${UNAME_RELEASE} + echo sparc-sun-sunos"$UNAME_RELEASE" ;; esac exit ;; aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} + echo sparc-auspex-sunos"$UNAME_RELEASE" exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not @@ -400,44 +446,44 @@ # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint"$UNAME_RELEASE" exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} + echo m68k-milan-mint"$UNAME_RELEASE" exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} + echo m68k-hades-mint"$UNAME_RELEASE" exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} + echo m68k-unknown-mint"$UNAME_RELEASE" exit ;; m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} + echo m68k-apple-machten"$UNAME_RELEASE" exit ;; powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} + echo powerpc-apple-machten"$UNAME_RELEASE" exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 exit ;; RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} + echo mips-dec-ultrix"$UNAME_RELEASE" exit ;; VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} + echo vax-dec-ultrix"$UNAME_RELEASE" exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} + echo clipper-intergraph-clix"$UNAME_RELEASE" exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { @@ -446,23 +492,23 @@ #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && - dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`$dummy $dummyarg` && + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos${UNAME_RELEASE} + echo mips-mips-riscos"$UNAME_RELEASE" exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax @@ -488,17 +534,17 @@ AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] then - echo m88k-dg-dgux${UNAME_RELEASE} + echo m88k-dg-dgux"$UNAME_RELEASE" else - echo m88k-dg-dguxbcs${UNAME_RELEASE} + echo m88k-dg-dguxbcs"$UNAME_RELEASE" fi else - echo i586-dg-dgux${UNAME_RELEASE} + echo i586-dg-dgux"$UNAME_RELEASE" fi exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) @@ -515,7 +561,7 @@ echo m68k-tektronix-bsd exit ;; *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id @@ -527,14 +573,14 @@ if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" #include main() @@ -545,7 +591,7 @@ exit(0); } EOF - if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` then echo "$SYSTEM_NAME" else @@ -559,26 +605,27 @@ exit ;; *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" exit ;; *:AIX:*:*) echo rs6000-ibm-aix exit ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx @@ -593,28 +640,28 @@ echo m68k-hp-bsd4.4 exit ;; 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 esac ;; esac fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + if [ "$HP_ARCH" = "" ]; then + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" #define _HPUX_SOURCE #include @@ -647,13 +694,13 @@ exit (0); } EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = "hppa2.0w" ] + if [ "$HP_ARCH" = hppa2.0w ] then - eval $set_cc_for_build + eval "$set_cc_for_build" # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler @@ -664,23 +711,23 @@ # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH="hppa2.0w" + HP_ARCH=hppa2.0w else - HP_ARCH="hppa64" + HP_ARCH=hppa64 fi fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" exit ;; ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" exit ;; 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" #include int main () @@ -705,11 +752,11 @@ exit (0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) @@ -718,7 +765,7 @@ *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) @@ -726,9 +773,9 @@ exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk + echo "$UNAME_MACHINE"-unknown-osf1mk else - echo ${UNAME_MACHINE}-unknown-osf1 + echo "$UNAME_MACHINE"-unknown-osf1 fi exit ;; parisc*:Lites*:*:*) @@ -753,127 +800,109 @@ echo c4-convex-bsd exit ;; CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" exit ;; sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} + echo sparc-unknown-bsdi"$UNAME_RELEASE" exit ;; *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" exit ;; *:FreeBSD:*:*) UNAME_PROCESSOR=`/usr/bin/uname -p` - case ${UNAME_PROCESSOR} in + case "$UNAME_PROCESSOR" in amd64) - echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; - *) - echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" exit ;; i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin + echo "$UNAME_MACHINE"-pc-cygwin exit ;; *:MINGW64*:*) - echo ${UNAME_MACHINE}-pc-mingw64 + echo "$UNAME_MACHINE"-pc-mingw64 exit ;; *:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 + echo "$UNAME_MACHINE"-pc-mingw32 exit ;; - i*:MSYS*:*) - echo ${UNAME_MACHINE}-pc-msys - exit ;; - i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys exit ;; i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 + echo "$UNAME_MACHINE"-pc-pw32 exit ;; *:Interix*:*) - case ${UNAME_MACHINE} in + case "$UNAME_MACHINE" in x86) - echo i586-pc-interix${UNAME_RELEASE} + echo i586-pc-interix"$UNAME_RELEASE" exit ;; authenticamd | genuineintel | EM64T) - echo x86_64-unknown-interix${UNAME_RELEASE} + echo x86_64-unknown-interix"$UNAME_RELEASE" exit ;; IA64) - echo ia64-unknown-interix${UNAME_RELEASE} + echo ia64-unknown-interix"$UNAME_RELEASE" exit ;; esac ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit ;; - 8664:Windows_NT:*) - echo x86_64-pc-mks - exit ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit ;; i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin + echo "$UNAME_MACHINE"-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) echo x86_64-unknown-cygwin exit ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit ;; prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" exit ;; *:GNU:*:*) # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" exit ;; i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix + echo "$UNAME_MACHINE"-pc-minix exit ;; aarch64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in @@ -886,63 +915,64 @@ EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; arm*:Linux:*:*) - eval $set_cc_for_build + eval "$set_cc_for_build" if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then - echo ${UNAME_MACHINE}-unknown-linux-gnueabi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi else - echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf fi fi exit ;; avr32*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; cris:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" exit ;; crisv32:Linux:*:*) - echo ${UNAME_MACHINE}-axis-linux-gnu + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; frv:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; hexagon:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; i*86:Linux:*:*) - LIBC=gnu - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" exit ;; ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; m32r*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; mips:Linux:*:* | mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + eval "$set_cc_for_build" + sed 's/^ //' << EOF > "$dummy.c" #undef CPU #undef ${UNAME_MACHINE} #undef ${UNAME_MACHINE}el @@ -956,58 +986,74 @@ #endif #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`" + test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; } ;; - or32:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; padre:Linux:*:*) - echo sparc-unknown-linux-gnu + echo sparc-unknown-linux-"$LIBC" exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu + echo hppa64-unknown-linux-"$LIBC" exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; esac exit ;; ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" exit ;; ppc64le:Linux:*:*) - echo powerpc64le-unknown-linux-gnu + echo powerpc64le-unknown-linux-"$LIBC" exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; tile*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; vax:Linux:*:*) - echo ${UNAME_MACHINE}-dec-linux-gnu + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" exit ;; x86_64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + if objdump -f /bin/sh | grep -q elf32-x86-64; then + echo "$UNAME_MACHINE"-pc-linux-"$LIBC"x32 + else + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + fi exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. @@ -1021,34 +1067,34 @@ # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx + echo "$UNAME_MACHINE"-pc-os2-emx exit ;; i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop + echo "$UNAME_MACHINE"-unknown-stop exit ;; i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos + echo "$UNAME_MACHINE"-unknown-atheos exit ;; i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable + echo "$UNAME_MACHINE"-pc-syllable exit ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} + echo i386-unknown-lynxos"$UNAME_RELEASE" exit ;; i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp + echo "$UNAME_MACHINE"-pc-msdosdjgpp exit ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" fi exit ;; i*86:*:5:[678]*) @@ -1058,12 +1104,12 @@ *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}" exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 @@ -1073,9 +1119,9 @@ && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" else - echo ${UNAME_MACHINE}-pc-sysv32 + echo "$UNAME_MACHINE"-pc-sysv32 fi exit ;; pc:*:*:*) @@ -1083,7 +1129,7 @@ # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configury will decide that + # prints for the "djgpp" host, or else GDB configure will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1095,9 +1141,9 @@ exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 fi exit ;; mini*:CTIX:SYS*5:*) @@ -1117,9 +1163,9 @@ test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; @@ -1128,28 +1174,28 @@ test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} + echo m68k-unknown-lynxos"$UNAME_RELEASE" exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 exit ;; TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} + echo sparc-unknown-lynxos"$UNAME_RELEASE" exit ;; rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} + echo rs6000-unknown-lynxos"$UNAME_RELEASE" exit ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} + echo powerpc-unknown-lynxos"$UNAME_RELEASE" exit ;; SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} + echo mips-dde-sysv"$UNAME_RELEASE" exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 @@ -1160,7 +1206,7 @@ *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 + echo "$UNAME_MACHINE"-sni-sysv4 else echo ns32k-sni-sysv fi @@ -1180,23 +1226,23 @@ exit ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. - echo ${UNAME_MACHINE}-stratus-vos + echo "$UNAME_MACHINE"-stratus-vos exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos exit ;; mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} + echo m68k-apple-aux"$UNAME_RELEASE" exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + echo mips-nec-sysv"$UNAME_RELEASE" else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv"$UNAME_RELEASE" fi exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. @@ -1215,65 +1261,93 @@ echo x86_64-unknown-haiku exit ;; SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} + echo sx4-nec-superux"$UNAME_RELEASE" exit ;; SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} + echo sx5-nec-superux"$UNAME_RELEASE" exit ;; SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} + echo sx6-nec-superux"$UNAME_RELEASE" exit ;; SX-7:SUPER-UX:*:*) - echo sx7-nec-superux${UNAME_RELEASE} + echo sx7-nec-superux"$UNAME_RELEASE" exit ;; SX-8:SUPER-UX:*:*) - echo sx8-nec-superux${UNAME_RELEASE} + echo sx8-nec-superux"$UNAME_RELEASE" exit ;; SX-8R:SUPER-UX:*:*) - echo sx8r-nec-superux${UNAME_RELEASE} + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" exit ;; Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} + echo powerpc-apple-rhapsody"$UNAME_RELEASE" exit ;; *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown - case $UNAME_PROCESSOR in - i386) - eval $set_cc_for_build - if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - UNAME_PROCESSOR="x86_64" - fi - fi ;; - unknown) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + eval "$set_cc_for_build" + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then + if test "$UNAME_PROCESSOR" = x86; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" exit ;; *:QNX:*:4*) echo i386-pc-qnx exit ;; - NEO-?:NONSTOP_KERNEL:*:*) - echo neo-tandem-nsk${UNAME_RELEASE} + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" exit ;; NSE-*:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk${UNAME_RELEASE} + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" exit ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux @@ -1282,18 +1356,18 @@ echo bs2000-siemens-sysv exit ;; DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = "386"; then + if test "$cputype" = 386; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" fi - echo ${UNAME_MACHINE}-unknown-plan9 + echo "$UNAME_MACHINE"-unknown-plan9 exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 @@ -1314,14 +1388,14 @@ echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} + echo mips-sei-seiux"$UNAME_RELEASE" exit ;; *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" exit ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` - case "${UNAME_MACHINE}" in + case "$UNAME_MACHINE" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; V*) echo vax-dec-vms ; exit ;; @@ -1330,182 +1404,48 @@ echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" exit ;; i*86:rdos:*:*) - echo ${UNAME_MACHINE}-pc-rdos + echo "$UNAME_MACHINE"-pc-rdos exit ;; i*86:AROS:*:*) - echo ${UNAME_MACHINE}-pc-aros + echo "$UNAME_MACHINE"-pc-aros exit ;; x86_64:VMkernel:*:*) - echo ${UNAME_MACHINE}-unknown-esx + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs exit ;; esac -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix\n"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif +echo "$0: unable to guess system type" >&2 -#if defined (_SEQUENT_) - struct utsname un; +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 < -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} +NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize +the system type. Please install a C compiler and try again. EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - c34*) - echo c34-convex-bsd - exit ;; - c38*) - echo c38-convex-bsd - exit ;; - c4*) - echo c4-convex-bsd - exit ;; - esac -fi + ;; +esac cat >&2 < in order to provide the needed -information to handle your system. +If $0 has already been updated, send the following data and any +information you think might be pertinent to config-patches@gnu.org to +provide the necessary information to handle your system. config.guess timestamp = $timestamp @@ -1524,16 +1464,16 @@ /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" EOF exit 1 # Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) +# eval: (add-hook 'write-file-functions 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff -Nru x264-0.157.2935+git545de2f/config.sub x264-0.160.3011+gitcde9a93/config.sub --- x264-0.157.2935+git545de2f/config.sub 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/config.sub 2020-07-13 10:30:22.000000000 +0000 @@ -1,36 +1,31 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012 Free Software Foundation, Inc. - -timestamp='2012-12-06' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-02-22' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, see . +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). -# Please send patches to . Submit a context -# diff and a properly formatted GNU ChangeLog entry. +# Please send patches to . # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. @@ -38,7 +33,7 @@ # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -58,12 +53,11 @@ me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS Canonicalize a configuration name. -Operation modes: +Options: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit @@ -73,9 +67,7 @@ version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 -Free Software Foundation, Inc. +Copyright 1992-2018 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -102,7 +94,7 @@ *local*) # First pass through any local machine types. - echo $1 + echo "$1" exit ;; * ) @@ -120,24 +112,24 @@ # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). # Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ - knetbsd*-gnu* | netbsd*-gnu* | \ - kopensolaris*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ + kopensolaris*-gnu* | cloudabi*-eabi* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; android-linux) os=-linux-android - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown ;; *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` + basic_machine=`echo "$1" | sed 's/-[^-]*$//'` + if [ "$basic_machine" != "$1" ] + then os=`echo "$1" | sed 's/.*-/-/'` else os=; fi ;; esac @@ -186,44 +178,44 @@ ;; -sco6) os=-sco5v6 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco5) os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco4) os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco3.2.[4-9]*) os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco3.2v[4-9]*) # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco5v6*) # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -sco*) os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -isc) os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -clix*) basic_machine=clipper-intergraph ;; -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'` ;; -lynx*178) os=-lynxos178 @@ -235,10 +227,7 @@ os=-lynxos ;; -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` + basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'` ;; -psos*) os=-psos @@ -259,19 +248,21 @@ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc \ + | arc | arceb \ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ | avr | avr32 \ + | ba \ | be32 | be64 \ | bfin \ - | c4x | clipper \ + | c4x | c8051 | clipper \ | d10v | d30v | dlx | dsp16xx \ - | epiphany \ - | fido | fr30 | frv \ + | e2k | epiphany \ + | fido | fr30 | frv | ft32 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ - | i370 | i860 | i960 | ia64 \ + | i370 | i860 | i960 | ia16 | ia64 \ | ip2k | iq2000 \ + | k1om \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ @@ -289,26 +280,30 @@ | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ + | mipsisa32r6 | mipsisa32r6el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64r6 | mipsisa64r6el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ + | mipsr5900 | mipsr5900el \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ | nds32 | nds32le | nds32be \ - | nios | nios2 \ + | nios | nios2 | nios2eb | nios2el \ | ns16k | ns32k \ - | open8 \ - | or32 \ - | pdp10 | pdp11 | pj | pjl \ + | open8 | or1k | or1knd | or32 \ + | pdp10 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pru \ | pyramid \ + | riscv32 | riscv64 \ | rl78 | rx \ | score \ - | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ @@ -316,7 +311,8 @@ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ - | we32k \ + | visium \ + | wasm32 \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) basic_machine=$basic_machine-unknown @@ -330,11 +326,14 @@ c6x) basic_machine=tic6x-unknown ;; - m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + leon|leon[3-9]) + basic_machine=sparc-$basic_machine + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip) basic_machine=$basic_machine-unknown os=-none ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65) ;; ms1) basic_machine=mt-unknown @@ -363,7 +362,7 @@ ;; # Object if more than one company name word. *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 exit 1 ;; # Recognize the basic CPU types with company name. @@ -372,21 +371,23 @@ | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ + | ba-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ - | clipper-* | craynv-* | cydra-* \ + | c8051-* | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ + | e2k-* | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ - | i*86-* | i860-* | i960-* | ia64-* \ + | i*86-* | i860-* | i960-* | ia16-* | ia64-* \ | ip2k-* | iq2000-* \ + | k1om-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ @@ -406,28 +407,34 @@ | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa32r6-* | mipsisa32r6el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64r6-* | mipsisa64r6el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipsr5900-* | mipsr5900el-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nds32-* | nds32le-* | nds32be-* \ - | nios-* | nios2-* \ + | nios-* | nios2-* | nios2eb-* | nios2el-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ + | or1k*-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pru-* \ | pyramid-* \ + | riscv32-* | riscv64-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ @@ -435,6 +442,8 @@ | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ + | visium-* \ + | wasm32-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ @@ -448,7 +457,7 @@ # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. 386bsd) - basic_machine=i386-unknown + basic_machine=i386-pc os=-bsd ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) @@ -482,7 +491,7 @@ basic_machine=x86_64-pc ;; amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; amdahl) basic_machine=580-amdahl @@ -511,6 +520,9 @@ basic_machine=i386-pc os=-aros ;; + asmjs) + basic_machine=asmjs-unknown + ;; aux) basic_machine=m68k-apple os=-aux @@ -524,7 +536,7 @@ os=-linux ;; blackfin-*) - basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'` os=-linux ;; bluegene*) @@ -532,13 +544,13 @@ os=-cnk ;; c54x-*) - basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; c55x-*) - basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; c6x-*) - basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; c90) basic_machine=c90-cray @@ -627,10 +639,18 @@ basic_machine=rs6000-bull os=-bosx ;; - dpx2* | dpx2*-bull) + dpx2*) basic_machine=m68k-bull os=-sysv3 ;; + e500v[12]) + basic_machine=powerpc-unknown + os=$os"spe" + ;; + e500v[12]-*) + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` + os=$os"spe" + ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -720,9 +740,6 @@ hp9k8[0-9][0-9] | hp8[0-9][0-9]) basic_machine=hppa1.0-hp ;; - hppa-next) - os=-nextstep3 - ;; hppaosf) basic_machine=hppa1.1-hp os=-osf @@ -735,26 +752,26 @@ basic_machine=i370-ibm ;; i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` os=-sysv32 ;; i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` os=-sysv4 ;; i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` os=-sysv ;; i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'` os=-solaris2 ;; i386mach) basic_machine=i386-mach os=-mach ;; - i386-vsta | vsta) + vsta) basic_machine=i386-unknown os=-vsta ;; @@ -772,17 +789,17 @@ basic_machine=m68k-isi os=-sysv ;; + leon-*|leon[3-9]-*) + basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'` + ;; m68knommu) basic_machine=m68k-unknown os=-linux ;; m68knommu-*) - basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'` os=-linux ;; - m88k-omron*) - basic_machine=m88k-omron - ;; magnum | m3230) basic_machine=mips-mips os=-sysv @@ -799,7 +816,7 @@ os=-mingw64 ;; mingw32) - basic_machine=i386-pc + basic_machine=i686-pc os=-mingw32 ;; mingw32ce) @@ -814,10 +831,10 @@ os=-mint ;; mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'` ;; mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown ;; monitor) basic_machine=m68k-rom68k @@ -827,15 +844,19 @@ basic_machine=powerpc-unknown os=-morphos ;; + moxiebox) + basic_machine=moxie-unknown + os=-moxiebox + ;; msdos) basic_machine=i386-pc os=-msdos ;; ms1-*) - basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'` ;; msys) - basic_machine=i386-pc + basic_machine=i686-pc os=-msys ;; mvs) @@ -874,7 +895,7 @@ basic_machine=v70-nec os=-sysv ;; - next | m*-next ) + next | m*-next) basic_machine=m68k-next case $os in -nextstep* ) @@ -919,6 +940,12 @@ nsr-tandem) basic_machine=nsr-tandem ;; + nsv-tandem) + basic_machine=nsv-tandem + ;; + nsx-tandem) + basic_machine=nsx-tandem + ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf @@ -951,7 +978,7 @@ os=-linux ;; parisc-*) - basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'` os=-linux ;; pbd) @@ -967,7 +994,7 @@ basic_machine=i386-pc ;; pc98-*) - basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc @@ -982,16 +1009,16 @@ basic_machine=i786-pc ;; pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; pn) basic_machine=pn-gould @@ -1001,23 +1028,23 @@ ppc | ppcbe) basic_machine=powerpc-unknown ;; ppc-* | ppcbe-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle | ppc-le | powerpc-little) + ppcle | powerpclittle) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; ppc64) basic_machine=powerpc64-unknown ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) + ppc64le | powerpc64little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; ps2) basic_machine=i386-ibm @@ -1071,17 +1098,10 @@ sequent) basic_machine=i386-sequent ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; sh5el) basic_machine=sh5le-unknown ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) + simso-wrs) basic_machine=sparclite-wrs os=-vxworks ;; @@ -1100,7 +1120,7 @@ os=-sysv4 ;; strongarm-* | thumb-*) - basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'` ;; sun2) basic_machine=m68000-sun @@ -1222,6 +1242,9 @@ basic_machine=hppa1.1-winbond os=-proelf ;; + x64) + basic_machine=x86_64-pc + ;; xbox) basic_machine=i686-pc os=-mingw32 @@ -1230,20 +1253,12 @@ basic_machine=xps100-honeywell ;; xscale-* | xscalee[bl]-*) - basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'` ;; ymp) basic_machine=ymp-cray os=-unicos ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - z80-*-coff) - basic_machine=z80-unknown - os=-sim - ;; none) basic_machine=none-none os=-none @@ -1272,10 +1287,6 @@ vax) basic_machine=vax-dec ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; pdp11) basic_machine=pdp11-dec ;; @@ -1285,9 +1296,6 @@ sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; - sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) - basic_machine=sparc-sun - ;; cydra) basic_machine=cydra-cydrome ;; @@ -1307,7 +1315,7 @@ # Make sure to match an already-canonicalized machine name. ;; *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2 exit 1 ;; esac @@ -1315,10 +1323,10 @@ # Here we canonicalize certain aliases for manufacturers. case $basic_machine in *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'` ;; *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'` ;; *) ;; @@ -1329,8 +1337,8 @@ if [ x"$os" != x"" ] then case $os in - # First match some system type aliases - # that might get confused with valid system types. + # First match some system type aliases that might get confused + # with valid system types. # -solaris* is a basic system type, with this one exception. -auroraux) os=-auroraux @@ -1341,45 +1349,48 @@ -solaris) os=-solaris2 ;; - -svr4*) - os=-sysv4 - ;; -unixware*) os=-sysv4.2uw ;; -gnu/linux*) os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` ;; - # First accept the basic system types. + # es1800 is here to avoid being matched by es* (a different OS) + -es1800*) + os=-ose + ;; + # Now accept the basic system types. # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. + # Each alternative MUST end in a * to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ - | -sym* | -kopensolaris* \ + | -sym* | -kopensolaris* | -plan9* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* | -aros* \ + | -aos* | -aros* | -cloudabi* | -sortix* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* \ + | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \ + | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* | -cegcc* \ + | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ - | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -morphos* | -superux* | -rtmk* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ + | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \ + | -midnightbsd*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1396,12 +1407,12 @@ -nto*) os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + -sim | -xray | -os68k* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) - os=`echo $os | sed -e 's|mac|macos|'` + os=`echo "$os" | sed -e 's|mac|macos|'` ;; -linux-dietlibc) os=-linux-dietlibc @@ -1410,10 +1421,10 @@ os=`echo $os | sed -e 's|linux|linux-gnu|'` ;; -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` ;; -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` ;; -opened*) os=-openedition @@ -1424,12 +1435,6 @@ -wince*) os=-wince ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; -utek*) os=-bsd ;; @@ -1454,7 +1459,7 @@ -nova*) os=-rtmk-nova ;; - -ns2 ) + -ns2) os=-nextstep2 ;; -nsk*) @@ -1476,7 +1481,7 @@ -oss*) os=-sysv3 ;; - -svr4) + -svr4*) os=-sysv4 ;; -svr3) @@ -1491,35 +1496,38 @@ -ose*) os=-ose ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) os=-mint ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; -zvmoe) os=-zvmoe ;; -dicos*) os=-dicos ;; + -pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $basic_machine in + arm*) + os=-eabi + ;; + *) + os=-elf + ;; + esac + ;; -nacl*) ;; + -ios) + ;; -none) ;; *) # Get rid of the `-' at the beginning of $os. os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 exit 1 ;; esac @@ -1554,6 +1562,9 @@ c4x-* | tic4x-*) os=-coff ;; + c8051-*) + os=-elf + ;; hexagon-*) os=-elf ;; @@ -1606,12 +1617,12 @@ sparc-* | *-sun) os=-sunos4.1.1 ;; + pru-*) + os=-elf + ;; *-be) os=-beos ;; - *-haiku) - os=-haiku - ;; *-ibm) os=-aix ;; @@ -1651,7 +1662,7 @@ m88k-omron*) os=-luna ;; - *-next ) + *-next) os=-nextstep ;; *-sequent) @@ -1666,9 +1677,6 @@ i370-*) os=-mvs ;; - *-next) - os=-nextstep3 - ;; *-gould) os=-sysv ;; @@ -1778,15 +1786,15 @@ vendor=stratus ;; esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"` ;; esac -echo $basic_machine$os +echo "$basic_machine$os" exit # Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) +# eval: (add-hook 'write-file-functions 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" diff -Nru x264-0.157.2935+git545de2f/configure x264-0.160.3011+gitcde9a93/configure --- x264-0.157.2935+git545de2f/configure 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/configure 2020-07-13 10:30:21.000000000 +0000 @@ -128,7 +128,7 @@ } cc_check() { - if [ -z "$3" ]; then + if [ -z "$3$4" ]; then if [ -z "$1$2" ]; then log_check "whether $CC works" elif [ -z "$1" ]; then @@ -138,7 +138,11 @@ fi elif [ -z "$1" ]; then if [ -z "$2" ]; then - log_check "whether $CC supports $3" + if [ -z "$3" ]; then + log_check "whether $CC supports $4" + else + log_check "whether $CC supports $3" + fi else log_check "whether $CC supports $3 with $2" fi @@ -149,11 +153,14 @@ for arg in $1; do echo "#include <$arg>" >> conftest.c done + if [ -n "$4" ]; then + echo "$4" >> conftest.c + fi echo "int main (void) { $3 return 0; }" >> conftest.c if [ $compiler_style = MS ]; then - cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)" + cc_cmd="$CC conftest.c $(cc_cflags $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2) -link $(cl_ldflags $2 $LDFLAGSCLI $LDFLAGS)" else - cc_cmd="$CC conftest.c $CFLAGS $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest" + cc_cmd="$CC conftest.c $CFLAGS $CFLAGSCLI $CHECK_CFLAGS $2 $LDFLAGSCLI $LDFLAGS -o conftest" fi if $cc_cmd >conftest.log 2>&1; then res=$? @@ -380,6 +387,8 @@ vsx="auto" CFLAGS="$CFLAGS -Wall -I. -I\$(SRCPATH)" +CFLAGSSO="$CFLAGSSO" +CFLAGSCLI="$CFLAGSCLI" LDFLAGS="$LDFLAGS" LDFLAGSCLI="$LDFLAGSCLI" ASFLAGS="$ASFLAGS -I. -I\$(SRCPATH)" @@ -394,7 +403,7 @@ " # list of all preprocessor HAVE values we can define -CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \ MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10" @@ -532,6 +541,7 @@ CC="${CC-${cross_prefix}gcc}" STRIP="${STRIP-${cross_prefix}strip}" +STRINGS="${STRINGS-${cross_prefix}strings}" INSTALL="${INSTALL-install}" PKGCONFIG="${PKGCONFIG-${cross_prefix}pkg-config}" @@ -564,7 +574,7 @@ # test for use of compilers that require specific handling cc_base="$(basename "$CC")" QPRE="-" -if [[ $host_os = mingw* || $host_os = cygwin* ]]; then +if [[ $host_os = mingw* || $host_os = msys* || $host_os = cygwin* ]]; then if [[ "$cc_base" = icl || "$cc_base" = icl[\ .]* ]]; then # Windows Intel Compiler creates dependency generation with absolute Windows paths, Cygwin's make does not support Windows paths. [[ $host_os = cygwin* ]] && die "Windows Intel Compiler support requires MSYS" @@ -611,6 +621,15 @@ if cc_check '' -Werror=unknown-warning-option ; then CHECK_CFLAGS="$CHECK_CFLAGS -Werror=unknown-warning-option" fi + if cc_check '' -Werror=unknown-attributes ; then + CHECK_CFLAGS="$CHECK_CFLAGS -Werror=unknown-attributes" + fi + if cc_check '' -Werror=attributes ; then + CHECK_CFLAGS="$CHECK_CFLAGS -Werror=attributes" + fi + if cc_check '' -Werror=ignored-attributes ; then + CHECK_CFLAGS="$CHECK_CFLAGS -Werror=ignored-attributes" + fi fi libm="" @@ -738,7 +757,7 @@ stack_alignment=16 [ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS" if [ "$SYS" = MACOSX ]; then - ASFLAGS="$ASFLAGS -f macho64 -DPIC -DPREFIX" + ASFLAGS="$ASFLAGS -f macho64 -DPREFIX" if cc_check '' "-arch x86_64"; then CFLAGS="$CFLAGS -arch x86_64" LDFLAGS="$LDFLAGS -arch x86_64" @@ -886,34 +905,36 @@ pic="yes" fi -if [ $compiler = GNU -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then - if cc_check '' -mpreferred-stack-boundary=6 ; then - CFLAGS="$CFLAGS -mpreferred-stack-boundary=6" - stack_alignment=64 - elif cc_check '' -mstack-alignment=64 ; then - CFLAGS="$CFLAGS -mstack-alignment=64" - stack_alignment=64 - elif [ $stack_alignment -lt 16 ] ; then - if cc_check '' -mpreferred-stack-boundary=4 ; then - CFLAGS="$CFLAGS -mpreferred-stack-boundary=4" - stack_alignment=16 - elif cc_check '' -mstack-alignment=16 ; then - CFLAGS="$CFLAGS -mstack-alignment=16" - stack_alignment=16 - fi - fi -elif [ $compiler = ICC -a $ARCH = X86 ]; then - # icc on linux has various degrees of mod16 stack support - if [ $SYS = LINUX ]; then - # >= 12 defaults to a mod16 stack - if cpp_check "" "" "__INTEL_COMPILER >= 1200" ; then - stack_alignment=16 - # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so. - elif cpp_check "" "" "__INTEL_COMPILER >= 1100" ; then - CFLAGS="$CFLAGS -falign-stack=assume-16-byte" - stack_alignment=16 +if cc_check '' '' '' '__attribute__((force_align_arg_pointer))' ; then + if [ $compiler = GNU -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then + if cc_check '' -mpreferred-stack-boundary=6 ; then + CFLAGS="$CFLAGS -mpreferred-stack-boundary=6" + stack_alignment=64 + elif cc_check '' -mstack-alignment=64 ; then + CFLAGS="$CFLAGS -mstack-alignment=64" + stack_alignment=64 + elif [ $stack_alignment -lt 16 ] ; then + if cc_check '' -mpreferred-stack-boundary=4 ; then + CFLAGS="$CFLAGS -mpreferred-stack-boundary=4" + stack_alignment=16 + elif cc_check '' -mstack-alignment=16 ; then + CFLAGS="$CFLAGS -mstack-alignment=16" + stack_alignment=16 + fi + fi + elif [ $compiler = ICC -a $ARCH = X86 ]; then + # icc on linux has various degrees of mod16 stack support + if [ $SYS = LINUX ]; then + # >= 12 defaults to a mod16 stack + if cpp_check "" "" "__INTEL_COMPILER >= 1200" ; then + stack_alignment=16 + # 11 <= x < 12 is capable of keeping a mod16 stack, but defaults to not doing so. + elif cpp_check "" "" "__INTEL_COMPILER >= 1100" ; then + CFLAGS="$CFLAGS -falign-stack=assume-16-byte" + stack_alignment=16 + fi + # < 11 is completely incapable of keeping a mod16 stack fi - # < 11 is completely incapable of keeping a mod16 stack fi fi @@ -951,8 +972,10 @@ if [ $asm = auto -a $ARCH = AARCH64 ] ; then if [ $compiler = CL ] && cpp_check '' '' 'defined(_M_ARM64)' ; then + define HAVE_AARCH64 define HAVE_NEON elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then + define HAVE_AARCH64 define HAVE_NEON ASFLAGS="$ASFLAGS -c" else @@ -995,10 +1018,10 @@ if [ $compiler = GNU ]; then echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c $CC $CFLAGS conftest.c -c -o conftest.o 2>/dev/null || die "endian test failed" - if (${cross_prefix}strings -a conftest.o | grep -q BIGE) && (${cross_prefix}strings -a conftest.o | grep -q FPendian) ; then + if (${STRINGS} -a conftest.o | grep -q BIGE) && (${STRINGS} -a conftest.o | grep -q FPendian) ; then define WORDS_BIGENDIAN CPU_ENDIAN="big-endian" - elif !(${cross_prefix}strings -a conftest.o | grep -q EGIB && ${cross_prefix}strings -a conftest.o | grep -q naidnePF) ; then + elif !(${STRINGS} -a conftest.o | grep -q EGIB && ${STRINGS} -a conftest.o | grep -q naidnePF) ; then die "endian test failed" fi fi @@ -1007,6 +1030,7 @@ [ "$static" = "yes" ] && die "Option --system-libx264 can not be used together with --enable-static" if pkg_check x264 ; then X264_LIBS="$($PKGCONFIG --libs x264)" + X264_CFLAGS="$($PKGCONFIG --cflags x264)" X264_INCLUDE_DIR="${X264_INCLUDE_DIR-$($PKGCONFIG --variable=includedir x264)}" configure_system_override "$X264_INCLUDE_DIR" || die "Detection of system libx264 configuration failed" else @@ -1067,7 +1091,7 @@ fi [ "$thread" != "no" ] && define HAVE_THREAD -if cc_check 'math.h' '' 'log2f(2);' ; then +if cc_check 'math.h' '' 'volatile float x = 2; return log2f(x);' ; then define HAVE_LOG2F fi @@ -1253,7 +1277,7 @@ if [ "$pic" = "yes" ] ; then [ "$SYS" != WINDOWS -a "$SYS" != CYGWIN ] && CFLAGS="$CFLAGS -fPIC" - ASFLAGS="$ASFLAGS -DPIC" + [[ "$ASFLAGS" != *"-DPIC"* ]] && ASFLAGS="$ASFLAGS -DPIC" # resolve textrels in the x86 asm cc_check stdio.h "-shared -Wl,-Bsymbolic" && SOFLAGS="$SOFLAGS -Wl,-Bsymbolic" [ $SYS = SunOS -a "$ARCH" = "X86" ] && SOFLAGS="$SOFLAGS -mimpure-text" @@ -1308,6 +1332,10 @@ CFLAGS="-Wno-maybe-uninitialized $CFLAGS" fi +if [ $compiler = GNU ] && cc_check '' -fvisibility=hidden ; then + CFLAGS="$CFLAGS -fvisibility=hidden" +fi + if [ $compiler = ICC -o $compiler = ICL ] ; then if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then define HAVE_INTEL_DISPATCHER @@ -1365,12 +1393,22 @@ ${SRCPATH}/version.sh >> x264_config.h +if [ "$shared" = "yes" ]; then + CFLAGSSO="$CFLAGSSO -DX264_API_EXPORTS" +fi + if [ "$cli_libx264" = "system" ] ; then if [ "$shared" = "yes" ]; then - CLI_LIBX264='$(SONAME)' + if [ "$SYS" = "WINDOWS" -o "$SYS" = "CYGWIN" ]; then + CLI_LIBX264='$(IMPLIBNAME)' + else + CLI_LIBX264='$(SONAME)' + fi + CFLAGSCLI="$CFLAGSCLI -DX264_API_IMPORTS" else CLI_LIBX264= LDFLAGSCLI="$X264_LIBS $LDFLAGSCLI" + CFLAGSCLI="$CFLAGSCLI $X264_CFLAGS" cc_check 'stdint.h x264.h' '' 'x264_encoder_open(0);' || die "System libx264 can't be used for compilation of this version" fi else @@ -1409,7 +1447,11 @@ LIBX264=libx264.a [ -n "$RC" ] && RCFLAGS="$RCFLAGS -I. -o " fi -[ $compiler != GNU ] && CFLAGS="$(cc_cflags $CFLAGS)" +if [ $compiler != GNU ]; then + CFLAGS="$(cc_cflags $CFLAGS)" + CFLAGSSO="$(cc_cflags $CFLAGSSO)" + CFLAGSCLI="$(cc_cflags $CFLAGSCLI)" +fi if [ $compiler = ICC -o $compiler = ICL ]; then # icc does not define __SSE__ until SSE2 optimization and icl never defines it or _M_IX86_FP [ \( $ARCH = X86_64 -o $ARCH = X86 \) -a $asm = yes ] && ! cpp_check "" "" "defined(__SSE__)" && define __SSE__ @@ -1448,13 +1490,17 @@ SYS=$SYS CC=$CC CFLAGS=$CFLAGS +CFLAGSSO=$CFLAGSSO +CFLAGSCLI=$CFLAGSCLI COMPILER=$compiler COMPILER_STYLE=$compiler_style DEPMM=$DEPMM DEPMT=$DEPMT LD=$LD LDFLAGS=$LDFLAGS +LDFLAGSCLI=$LDFLAGSCLI LIBX264=$LIBX264 +CLI_LIBX264=$CLI_LIBX264 AR=$AR RANLIB=$RANLIB STRIP=$STRIP @@ -1490,14 +1536,7 @@ echo "SONAME=libx264-$API.dll" >> config.mak if [ $compiler_style = MS ]; then echo 'IMPLIBNAME=libx264.dll.lib' >> config.mak - # GNU ld on windows defaults to exporting all global functions if there are no explicit __declspec(dllexport) declarations - # MSVC link does not act similarly, so it is required to make an export definition out of x264.h and use it at link time - echo "SOFLAGS=-dll -def:x264.def -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak - echo "EXPORTS" > x264.def - # export API functions - grep "^\(int\|void\|x264_t\).*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264.*\)(.*/\1/;s/open/open_$API/g" >> x264.def - # export API variables/data. must be flagged with the DATA keyword - grep "extern.*x264" ${SRCPATH}/x264.h | sed -e "s/.*\(x264\w*\)\W.*/\1 DATA/;" >> x264.def + echo "SOFLAGS=-dll -implib:\$(IMPLIBNAME) $SOFLAGS" >> config.mak else echo 'IMPLIBNAME=libx264.dll.a' >> config.mak echo "SOFLAGS=-shared -Wl,--out-implib,\$(IMPLIBNAME) $SOFLAGS" >> config.mak @@ -1524,9 +1563,6 @@ echo 'install: install-lib-static' >> config.mak fi -echo "LDFLAGSCLI = $LDFLAGSCLI" >> config.mak -echo "CLI_LIBX264 = $CLI_LIBX264" >> config.mak - cat > x264.pc << EOF prefix=$prefix exec_prefix=$exec_prefix @@ -1535,10 +1571,10 @@ Name: x264 Description: H.264 (MPEG4 AVC) encoder library -Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//') +Version: $(grep POINTVER < x264_config.h | sed -e 's/.* "//; s/".*//; s/ .*//') Libs: -L$libdir -lx264 $([ "$shared" = "yes" ] || echo $libpthread $libm $libdl) Libs.private: $([ "$shared" = "yes" ] && echo $libpthread $libm $libdl) -Cflags: -I$includedir +Cflags: -I$includedir $([ "$shared" = "yes" ] && echo "-DX264_API_IMPORTS") EOF filters="crop select_every" diff -Nru x264-0.157.2935+git545de2f/debian/changelog x264-0.160.3011+gitcde9a93/debian/changelog --- x264-0.157.2935+git545de2f/debian/changelog 2019-01-21 19:09:31.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/changelog 2021-03-24 16:57:25.000000000 +0000 @@ -1,8 +1,100 @@ -x264 (2:0.157.2935+git545de2f-1~bionic) bionic; urgency=medium +x264 (2:0.160.3011+gitcde9a93-2~18.04.york0) bionic; urgency=medium - * New upstream (157 + * debian/control: lower debhelper compat=11 - -- Doug McMahon Mon, 21 Jan 2019 14:09:31 -0500 + -- Jonathon Fernyhough Wed, 24 Mar 2021 16:57:25 +0000 + +x264 (2:0.160.3011+gitcde9a93-2) unstable; urgency=medium + + * Team upload + * Upload to unstable + * debian/rules: Fix awk invocation to work with make 4.2 and 4.3 (Closes: + #966241) + + -- Sebastian Ramacher Sun, 26 Jul 2020 16:52:56 +0200 + +x264 (2:0.160.3011+gitcde9a93-1) experimental; urgency=medium + + * Update to new stable upstream + * New upstream version 0.160.3011+gitcde9a93 + * Update debian/control for soname bump + * Regenerate manpage + + -- Rico Tzschichholz Mon, 13 Jul 2020 21:29:33 +0200 + +x264 (2:0.159.2999+git296494a-2) unstable; urgency=medium + + * Team upload + * Upload to unstable + + -- Sebastian Ramacher Sun, 12 Jul 2020 18:46:44 +0200 + +x264 (2:0.159.2999+git296494a-1) experimental; urgency=medium + + [ Fabian Greffrath ] + * Remove myself from Uploaders + + [ Rico Tzschichholz ] + * Update to new stable upstream + * New upstream version 0.159.2999+git296494a + * Drop upstreamed "Export x264_stack_align" patch + * Update debian/control for soname bump + * Regenerate manpage + + [ Ondřej Nový ] + * Use debhelper-compat instead of debian/compat + + [ Sebastian Ramacher ] + * debian/control: + - Bump debhlper compat to 13 + - Bump Standards-Version + - Set RRR: no + * debian/copyright: Bump copyright years + * debian/rules: Fix awk invocation + + -- Rico Tzschichholz Sun, 21 Jun 2020 15:52:08 +0200 + +x264 (2:0.155.2917+git0a84d98-2) unstable; urgency=medium + + * Team upload. + * Upload to unstable. + + -- Sebastian Ramacher Thu, 27 Sep 2018 22:03:40 +0200 + +x264 (2:0.155.2917+git0a84d98-1) experimental; urgency=medium + + [ Ondřej Nový ] + * d/copyright: Change Format URL to correct one + * d/control: Set Vcs-* to salsa.debian.org + * d/changelog: Remove trailing whitespaces + + [ Felipe Sateler ] + * Change maintainer address to debian-multimedia@lists.debian.org + + [ Ondřej Nový ] + * d/tests: Use AUTOPKGTEST_TMP instead of ADTTMP + + [ Rico Tzschichholz ] + * Update to new stable upstream + * New upstream version 0.155.2917+git0a84d98 + * Drop custom 10bit build, upstream supports this as runtime option now + * Update debian/control for soname bump + * Regenerate manpage + * Update copyright years + + [ Sebastian Ramacher ] + * debian/{rules,confflags}: + - Use dpkg include for architecture variables. + - Fix dh_install --list-missing warning. + - Remove cruft. + - Handle CPPFLAGS. + * debian/patches: Export x264_stack_align. + * debian/: Bump debhelper compat to 11. + * debian/control: + - Drop obsolete dpkg-dev B-D. + - Bump Standards-Version. + + -- Rico Tzschichholz Fri, 24 Aug 2018 17:25:59 +0200 x264 (2:0.152.2854+gite9a5903-2) unstable; urgency=medium @@ -609,11 +701,11 @@ * Drop --enable-pic, let's see what breaks, LP: #524859 [ Reinhard Tartler ] - * New upstream snapshot, no new features, LP: #526396 + * New upstream snapshot, no new features, LP: #526396 * remove quilt infrastructure * don't set CFLAGS in debian/rules, upstream build system overrides this anyways - + -- Reinhard Tartler Sun, 21 Feb 2010 16:57:21 +0100 x264 (2:0.85.1442.1+git781d30-1) lucid; urgency=low diff -Nru x264-0.157.2935+git545de2f/debian/compat x264-0.160.3011+gitcde9a93/debian/compat --- x264-0.157.2935+git545de2f/debian/compat 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/compat 2021-03-24 16:57:25.000000000 +0000 @@ -1 +1 @@ -9 +11 diff -Nru x264-0.157.2935+git545de2f/debian/confflags x264-0.160.3011+gitcde9a93/debian/confflags --- x264-0.157.2935+git545de2f/debian/confflags 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/confflags 2018-08-28 21:13:27.000000000 +0000 @@ -2,14 +2,7 @@ # vi:syntax=make # configure flags logic -# Set CFLAGS from DEB_CFLAGS if defined, otherwise add build flags from -# dpkg-buildflags excluding -O2. -CFLAGS = $(or $(DEB_CFLAGS),$(shell dpkg-buildflags --get CFLAGS 2>/dev/null | sed -e 's/-O2//g')) -ifeq (,$(CFLAGS)) - # Handle case for versions of Debian/Ubuntu that have dpkg-dev (<< 1.15.7). - CFLAGS = -fstack-protector --param=ssp-buffer-size=4 -Wformat -Wformat-security -Werror=format-security -endif -shared_extra_cflags = $(CFLAGS) +extra_cflags = $(CFLAGS) $(CPPFLAGS) LDFLAGS := $(filter-out %-Bsymbolic-functions,$(LDFLAGS)) @@ -119,14 +112,6 @@ endif endif -# See Bug#743713, the debian sparc and sh4 ports are currently stuck with gcc 4.6 -# -fno-aggressive-loop-optimizations was introduced only in gcc 4.8 -# this conditional will also help any backporters. -HAVEGCC4.8 :=$(shell dpkg --compare-versions `gcc --version | grep ^gcc | sed 's/^.* //g'` ge 4.8 && echo yes || echo no) -ifeq (yes,$(HAVEGCC4.8)) -common_confflags += --extra-cflags=-fno-aggressive-loop-optimizations -endif - # MIPS upstream arch, mips, mipsel and mips64el Debian arches; no upstream flags by # default ifneq (,$(filter mips mipsel mips64el,$(DEB_HOST_GNU_CPU))) @@ -151,7 +136,7 @@ ifeq ($(toolchain_arch),) toolchain_arch := armv6t2 endif -shared_extra_cflags += -march=$(toolchain_arch) +extra_cflags += -march=$(toolchain_arch) else do_opt := yes opt_libdir := /usr/lib/$(DEB_HOST_MULTIARCH)/neon/vfp diff -Nru x264-0.157.2935+git545de2f/debian/control x264-0.160.3011+gitcde9a93/debian/control --- x264-0.157.2935+git545de2f/debian/control 2019-01-21 19:09:31.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/control 2021-03-24 16:57:25.000000000 +0000 @@ -1,22 +1,21 @@ Source: x264 Section: libs Priority: optional -Maintainer: Debian Multimedia Maintainers +Maintainer: Debian Multimedia Maintainers Uploaders: Reinhard Tartler , - Fabian Greffrath , Rico Tzschichholz Build-Depends: - debhelper (>= 9.20160115), - dpkg-dev (>= 1.17.14), + debhelper (>= 11), libavformat-dev (>= 6:9) , libffms2-dev , libgpac-dev (>= 0.5.0+svn4288~) , nasm (>= 2.13) [any-i386 any-amd64] -Standards-Version: 4.1.3 -Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git -Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git +Standards-Version: 4.5.0 +Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git +Vcs-Browser: https://salsa.debian.org/multimedia-team/x264 Homepage: http://www.videolan.org/developers/x264.html +Rules-Requires-Root: no Package: x264 Section: graphics @@ -48,7 +47,7 @@ * parallel encoding on multiple CPUs * interlaced streams -Package: libx264-157 +Package: libx264-160 Architecture: any Multi-Arch: same Depends: @@ -65,7 +64,7 @@ Architecture: any Multi-Arch: same Depends: - libx264-157 (= ${binary:Version}), + libx264-160 (= ${binary:Version}), ${misc:Depends} Description: development files for libx264 libx264 is an advanced encoding library for creating H.264 (MPEG-4 AVC) diff -Nru x264-0.157.2935+git545de2f/debian/control.in x264-0.160.3011+gitcde9a93/debian/control.in --- x264-0.157.2935+git545de2f/debian/control.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/control.in 2021-03-24 16:57:25.000000000 +0000 @@ -1,22 +1,21 @@ Source: x264 Section: libs Priority: optional -Maintainer: Debian Multimedia Maintainers +Maintainer: Debian Multimedia Maintainers Uploaders: Reinhard Tartler , - Fabian Greffrath , Rico Tzschichholz Build-Depends: - debhelper (>= 9.20160115), - dpkg-dev (>= 1.17.14), + debhelper (>= 11), libavformat-dev (>= 6:9) , libffms2-dev , libgpac-dev (>= 0.5.0+svn4288~) , nasm (>= 2.13) [any-i386 any-amd64] -Standards-Version: 4.1.3 -Vcs-Git: https://anonscm.debian.org/git/pkg-multimedia/x264.git -Vcs-Browser: https://anonscm.debian.org/cgit/pkg-multimedia/x264.git +Standards-Version: 4.5.0 +Vcs-Git: https://salsa.debian.org/multimedia-team/x264.git +Vcs-Browser: https://salsa.debian.org/multimedia-team/x264 Homepage: http://www.videolan.org/developers/x264.html +Rules-Requires-Root: no Package: x264 Section: graphics diff -Nru x264-0.157.2935+git545de2f/debian/copyright x264-0.160.3011+gitcde9a93/debian/copyright --- x264-0.157.2935+git545de2f/debian/copyright 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/copyright 2020-06-21 13:49:01.000000000 +0000 @@ -1,17 +1,17 @@ -Format: http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=174&view=markup +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: x264 Upstream-Contact: x264-devel@videolan.org Source: http://www.videolan.org/developers/x264.html Files: * -Copyright: 2003-2017 x264 project +Copyright: 2003-2020 x264 project License: GPL-2+ Comment: This program is also available under a commercial proprietary license. For more information, contact us at licensing@x264.com. Files: common/x86/x86inc.asm -Copyright: 2005-2017 x264 project +Copyright: 2005-2020 x264 project License: ISC Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff -Nru x264-0.157.2935+git545de2f/debian/libx264N.install.in x264-0.160.3011+gitcde9a93/debian/libx264N.install.in --- x264-0.157.2935+git545de2f/debian/libx264N.install.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/libx264N.install.in 2018-08-28 20:31:24.000000000 +0000 @@ -1,2 +1 @@ shared/usr/lib/@DEB_HOST_MULTIARCH@/libx264.so.* usr/lib/@DEB_HOST_MULTIARCH@ -shared/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit/libx264.so.* usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit diff -Nru x264-0.157.2935+git545de2f/debian/rules x264-0.160.3011+gitcde9a93/debian/rules --- x264-0.157.2935+git545de2f/debian/rules 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/rules 2020-07-26 14:36:19.000000000 +0000 @@ -1,19 +1,15 @@ #!/usr/bin/make -f -libx264N := libx264-$(shell awk '/\#define X264_BUILD/{print $$3}' x264.h) - -DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) -DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) -DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -DEB_HOST_GNU_CPU ?= $(shell dpkg-architecture -qDEB_HOST_GNU_CPU) +libx264N := libx264-$(shell awk '/define X264_BUILD/{print $$3}' x264.h) +include /usr/share/dpkg/architecture.mk include debian/confflags DH_INSTALL_FILES = debian/$(libx264N).install \ debian/libx264-dev.install %: - dh $@ --parallel + dh $@ .PHONY: debian/control debian/control: @@ -24,13 +20,13 @@ override_dh_auto_build: # Build static lib - LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \ + CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(static_confflags) \ || ( tail -v -n +0 config.log config.log ; exit 1 ) $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/static $(MAKE) distclean # Build shared lib - CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \ + CFLAGS="$(extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) \ || ( tail -v -n +0 config.log config.log ; exit 1 ) $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/shared @@ -42,25 +38,6 @@ $(MAKE) $(MAKE) install DESTDIR=$(CURDIR)/debian/install/opt endif - # now do the 10 bit builds - $(MAKE) distclean - CFLAGS="$(shared_extra_cflags)" LDFLAGS="$(LDFLAGS)" ./configure $(shared_confflags) --bit-depth=10 \ - || ( tail -v -n +0 config.log config.log ; exit 1 ) - $(MAKE) - install -d -m755 $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - install -m755 libx264.so.* $(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - sed -e 's,@DEB_HOST_MULTIARCH@,$(DEB_HOST_MULTIARCH),' \ - debian/x264-10bit.in > $(CURDIR)/debian/install/shared/usr/bin/x264-10bit - chmod 755 $(CURDIR)/debian/install/shared/usr/bin/x264-10bit -ifeq ($(do_opt),yes) - $(MAKE) distclean - # Build opt lib - LDFLAGS="$(LDFLAGS)" ./configure $(opt_confflags) --bit-depth=10 \ - || ( tail -v -n +0 config.log config.log ; exit 1 ) - $(MAKE) - install -d -m755 $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit - install -m755 libx264.so.* $(CURDIR)/debian/install/opt/usr/lib/$(DEB_HOST_MULTIARCH)/x264-10bit -endif override_dh_auto_configure: # dh_auto_configure phase handled via dh_auto_build. @@ -74,12 +51,15 @@ dh_clean config.mak2 $(DH_INSTALL_FILES) override_dh_install: $(DH_INSTALL_FILES) - dh_install --list-missing --sourcedir=debian/install + dh_install --sourcedir=debian/install ifeq ($(do_opt),yes) mkdir -p debian/$(libx264N)$(opt_libdir) cp -a debian/install/opt$(opt_libdir)/*.so.* debian/$(libx264N)$(opt_libdir) endif +override_dh_missing: + dh_missing --list-missing --sourcedir=debian/install + debian/x264.1: build env LD_LIBRARY_PATH="$(LD_LIBRARY_PATH):$(CURDIR)/debian/install/shared/usr/lib/$(DEB_HOST_MULTIARCH)" \ help2man -n "fast h264 encoder" -N -s1 -S "Videolan project" -h '--fullhelp' \ diff -Nru x264-0.157.2935+git545de2f/debian/tests/encode-testimage x264-0.160.3011+gitcde9a93/debian/tests/encode-testimage --- x264-0.157.2935+git545de2f/debian/tests/encode-testimage 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/tests/encode-testimage 2018-08-24 15:41:41.000000000 +0000 @@ -4,7 +4,7 @@ set -e -cd "$ADTTMP" +cd "$AUTOPKGTEST_TMP" ffmpeg -y -filter_complex testsrc -t 10 in.avi x264 --crf 24 -o out.mkv in.avi diff -Nru x264-0.157.2935+git545de2f/debian/x264.1 x264-0.160.3011+gitcde9a93/debian/x264.1 --- x264-0.157.2935+git545de2f/debian/x264.1 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264.1 2020-07-13 19:29:03.000000000 +0000 @@ -1,9 +1,9 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.5. -.TH X264 "1" "December 2017" "Videolan project" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH X264 "1" "July 2020" "Videolan project" "User Commands" .SH NAME x264 \- fast h264 encoder .SH DESCRIPTION -x264 core:152 r2854 e9a5903 +x264 core:160 r3011 cde9a93 Syntax: x264 [options] \fB\-o\fR outfile infile .PP Infile can be raw (in which case resolution is required), @@ -18,8 +18,9 @@ \&.flv \-> Flash Video \&.mp4 \-> MP4 if compiled with GPAC or L\-SMASH support (gpac) .PP -Output bit depth: 10 (configured at compile time) -.SH OPTIONS +Output bit depth: 8/10 +\&. +Options: .TP \fB\-h\fR, \fB\-\-help\fR List basic options @@ -283,13 +284,13 @@ Ratecontrol: .TP \fB\-q\fR, \fB\-\-qp\fR -Force constant QP (0\-69, 0=lossless) +Force constant QP (0\-81, 0=lossless) .TP \fB\-B\fR, \fB\-\-bitrate\fR Set bitrate (kbit/s) .TP \fB\-\-crf\fR -Quality\-based VBR (0\-51) [23.0] +Quality\-based VBR (\fB\-12\-51\fR) [23.0] .HP \fB\-\-rc\-lookahead\fR Number of frames for frametype lookahead [40] .HP @@ -379,8 +380,7 @@ .TP \fB\-A\fR, \fB\-\-partitions\fR Partitions to consider ["p8x8,b8x8,i8x8,i4x4"] -\- p8x8, p4x4, b8x8, i8x8, i4x4 -\- none, all +\- p8x8, p4x4, b8x8, i8x8, i4x4, none, all (p4x4 requires p8x8. i8x8 requires \fB\-\-8x8dct\fR.) .TP \fB\-\-direct\fR @@ -468,7 +468,7 @@ .TP \fB\-\-cqm\fR Preset quant matrices ["flat"] -\- jvt, flat +\- flat, jvt .TP \fB\-\-cqmfile\fR Read custom quant matrices from a JM\-compatible file @@ -517,18 +517,25 @@ .TP smpte240m, linear, log100, log316, iec61966\-2\-4, bt1361e, iec61966\-2\-1, -bt2020\-10, bt2020\-12, smpte2084, smpte428 +bt2020\-10, bt2020\-12, smpte2084, smpte428, +arib\-std\-b67 .TP \fB\-\-colormatrix\fR Specify color matrix setting ["???"] \- undef, bt709, fcc, bt470bg, smpte170m, .TP smpte240m, GBR, YCgCo, bt2020nc, bt2020c, -smpte2085 +smpte2085, chroma\-derived\-nc, +chroma\-derived\-c, ICtCp .TP \fB\-\-chromaloc\fR Specify chroma sample location (0 to 5) [0] .TP +\fB\-\-alternative\-transfer\fR Specify an alternative transfer +characteristics ["undef"] +.IP +\- same values as \fB\-\-transfer\fR +.TP \fB\-\-nal\-hrd\fR Signal HRD information (requires vbv\-bufsize) \- none, vbr, cbr (cbr not allowed in .mp4) @@ -564,31 +571,28 @@ Specify input colorspace format for raw input \- valid csps for `raw' demuxer: .TP -i420, yv12, nv12, nv21, i422, yv16, nv16, yuyv, -uyvy, i444, yv24, bgr, bgra, rgb +i400, i420, yv12, nv12, nv21, i422, yv16, nv16, +yuyv, uyvy, i444, yv24, bgr, bgra, rgb .TP \- valid csps for `lavf' demuxer: yuv420p, yuyv422, rgb24, bgr24, yuv422p, yuv444p, yuv410p, yuv411p, gray, monow, monob, -pal8, yuvj420p, yuvj422p, yuvj444p, xvmcmc, -xvmcidct, uyvy422, uyyvyy411, bgr8, bgr4, -bgr4_byte, rgb8, rgb4, rgb4_byte, nv12, nv21, -argb, rgba, abgr, bgra, gray16be, gray16le, -yuv440p, yuvj440p, yuva420p, vdpau_h264, -vdpau_mpeg1, vdpau_mpeg2, vdpau_wmv3, -vdpau_vc1, rgb48be, rgb48le, rgb565be, -rgb565le, rgb555be, rgb555le, bgr565be, -bgr565le, bgr555be, bgr555le, vaapi_moco, -vaapi_idct, vaapi_vld, yuv420p16le, -yuv420p16be, yuv422p16le, yuv422p16be, -yuv444p16le, yuv444p16be, vdpau_mpeg4, -dxva2_vld, rgb444le, rgb444be, bgr444le, -bgr444be, ya8, bgr48be, bgr48le, yuv420p9be, -yuv420p9le, yuv420p10be, yuv420p10le, -yuv422p10be, yuv422p10le, yuv444p9be, -yuv444p9le, yuv444p10be, yuv444p10le, -yuv422p9be, yuv422p9le, vda_vld, gbrp, gbrp9be, -gbrp9le, gbrp10be, gbrp10le, gbrp16be, +pal8, yuvj420p, yuvj422p, yuvj444p, uyvy422, +uyyvyy411, bgr8, bgr4, bgr4_byte, rgb8, rgb4, +rgb4_byte, nv12, nv21, argb, rgba, abgr, bgra, +gray16be, gray16le, yuv440p, yuvj440p, +yuva420p, rgb48be, rgb48le, rgb565be, rgb565le, +rgb555be, rgb555le, bgr565be, bgr565le, +bgr555be, bgr555le, vaapi_moco, vaapi_idct, +vaapi_vld, yuv420p16le, yuv420p16be, +yuv422p16le, yuv422p16be, yuv444p16le, +yuv444p16be, dxva2_vld, rgb444le, rgb444be, +bgr444le, bgr444be, ya8, bgr48be, bgr48le, +yuv420p9be, yuv420p9le, yuv420p10be, +yuv420p10le, yuv422p10be, yuv422p10le, +yuv444p9be, yuv444p9le, yuv444p10be, +yuv444p10le, yuv422p9be, yuv422p9le, gbrp, +gbrp9be, gbrp9le, gbrp10be, gbrp10le, gbrp16be, gbrp16le, yuva422p, yuva444p, yuva420p9be, yuva420p9le, yuva422p9be, yuva422p9le, yuva444p9be, yuva444p9le, yuva420p10be, @@ -597,10 +601,10 @@ yuva420p16le, yuva422p16be, yuva422p16le, yuva444p16be, yuva444p16le, vdpau, xyz12le, xyz12be, nv16, nv20le, nv20be, rgba64be, -rgba64le, bgra64be, bgra64le, yvyu422, vda, -ya16be, ya16le, gbrap, gbrap16be, gbrap16le, -qsv, mmal, d3d11va_vld, cuda, 0rgb, rgb0, 0bgr, -bgr0, yuv420p12be, yuv420p12le, yuv420p14be, +rgba64le, bgra64be, bgra64le, yvyu422, ya16be, +ya16le, gbrap, gbrap16be, gbrap16le, qsv, mmal, +d3d11va_vld, cuda, 0rgb, rgb0, 0bgr, bgr0, +yuv420p12be, yuv420p12le, yuv420p14be, yuv420p14le, yuv422p12be, yuv422p12le, yuv422p14be, yuv422p14le, yuv444p12be, yuv444p12le, yuv444p14be, yuv444p14le, @@ -609,19 +613,25 @@ bayer_gbrg8, bayer_grbg8, bayer_bggr16le, bayer_bggr16be, bayer_rggb16le, bayer_rggb16be, bayer_gbrg16le, bayer_gbrg16be, bayer_grbg16le, -bayer_grbg16be, yuv440p10le, yuv440p10be, +bayer_grbg16be, xvmc, yuv440p10le, yuv440p10be, yuv440p12le, yuv440p12be, ayuv64le, ayuv64be, videotoolbox_vld, p010le, p010be, gbrap12be, gbrap12le, gbrap10be, gbrap10le, mediacodec, gray12be, gray12le, gray10be, gray10le, p016le, p016be, d3d11, gray9be, gray9le, gbrpf32be, -gbrpf32le, gbrapf32be, gbrapf32le, drm_prime +gbrpf32le, gbrapf32be, gbrapf32le, drm_prime, +opencl, gray14be, gray14le, grayf32be, +grayf32le, yuva422p12be, yuva422p12le, +yuva444p12be, yuva444p12le, nv24, nv42, vulkan, +y210be, y210le .TP \fB\-\-output\-csp\fR Specify output colorspace ["i420"] -\- i420, i422, i444, rgb +\- i400, i420, i422, i444, rgb .HP \fB\-\-input\-depth\fR Specify input bit depth for raw input +.HP +\fB\-\-output\-depth\fR Specify output bit depth .TP \fB\-\-input\-range\fR Specify input color range ["auto"] @@ -654,6 +664,9 @@ \fB\-\-avcintra\-class\fR Use compatibility hacks for AVC\-Intra class \- 50, 100, 200 .TP +\fB\-\-avcintra\-flavor\fR AVC\-Intra flavor ["panasonic"] +\- panasonic, sony +.TP \fB\-\-stitchable\fR Don't optimize headers based on video content Ensures ability to recombine a segmented encode @@ -762,7 +775,7 @@ \- fittobox and sar: same as above except with specified sar \- csp: convert to the given csp. syntax: [name][:depth] .IP -\- valid csp names [keep current]: i420, yv12, nv12, nv21, i422, yv16, nv16, yuyv, uyvy, i444, yv24, bgr, bgra, rgb +\- valid csp names [keep current]: i400, i420, yv12, nv12, nv21, i422, yv16, nv16, yuyv, uyvy, i444, yv24, bgr, bgra, rgb \- depth: 8 or 16 bits per pixel [keep current] .IP note: not all depths are supported by all csps. @@ -778,11 +791,11 @@ offsets: the offset into the step to select a frame see: http://avisynth.nl/index.php/Select#SelectEvery .PP -(libswscale 4.8.100) -(libavformat 57.83.100) +(libswscale 5.7.100) +(libavformat 58.45.100) (ffmpegsource 2.23.0.0) -built on Dec 31 2017, gcc: 7.2.0 -x264 configuration: \fB\-\-bit\-depth\fR=\fI\,8\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR -libx264 configuration: \fB\-\-bit\-depth\fR=\fI\,10\/\fR \fB\-\-chroma\-format\fR=\fI\,all\/\fR +built on Jul 13 2020, gcc: 9.3.0 +x264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR +libx264 configuration: \fB\-\-chroma\-format\fR=\fI\,all\/\fR x264 license: GPL version 2 or later libswscale/libavformat/ffmpegsource license: GPL version 2 or later diff -Nru x264-0.157.2935+git545de2f/debian/x264-10bit.1 x264-0.160.3011+gitcde9a93/debian/x264-10bit.1 --- x264-0.157.2935+git545de2f/debian/x264-10bit.1 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264-10bit.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -.TH x264-10bit 1 -.SH NAME -x264-10bit \- wrapper script for the 10-bit variant of the libx264 shared library -.SH SYNOPSIS -x264-10bit -.SH DESCRIPTION -This is a wrapper script that sets the LD_LIBRARY_PATH variable so that the 10-bit variant of the libx264 shared library is preferred over the regular 8-bit variant. It then calls the program with the arguments . -.SH OPTIONS -This wrapper script has no options. All arguments are passed over to the called program. -.SH SEE ALSO -x264(1) -.SH AUTHOR -Reinhard Tartler diff -Nru x264-0.157.2935+git545de2f/debian/x264-10bit.in x264-0.160.3011+gitcde9a93/debian/x264-10bit.in --- x264-0.157.2935+git545de2f/debian/x264-10bit.in 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264-10bit.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2014 Reinhard Tartler -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# . -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# . -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -export LD_LIBRARY_PATH=/usr/lib/@DEB_HOST_MULTIARCH@/x264-10bit${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH:-} - -if [ $# -gt 0 ]; then - exec "$@" - echo "Failed to execute '$@'" >&2 - exit 1 -fi - -echo "Usage `basename $0` " >&2 -exit 2 diff -Nru x264-0.157.2935+git545de2f/debian/x264-get-orig-source x264-0.160.3011+gitcde9a93/debian/x264-get-orig-source --- x264-0.157.2935+git545de2f/debian/x264-get-orig-source 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264-get-orig-source 2020-07-13 19:29:03.000000000 +0000 @@ -2,9 +2,9 @@ # Script used to generate the orig source tarball for x264. -X264_GIT_URL="git://git.videolan.org/x264.git" -X264_GIT_COMMIT="e9a5903edf8ca59ef20e6f4894c196f135af735e" -DATE_RETRIEVED="20171224" +X264_GIT_URL="https://code.videolan.org/videolan/x264.git" +X264_GIT_COMMIT="cde9a93319bea766a92e306d69059c76de970190" +DATE_RETRIEVED="20200702" COMMIT_SHORT_FORM="$(echo $X264_GIT_COMMIT | \ sed -e 's/^\([[:xdigit:]]\{,7\}\).*/\1/')" diff -Nru x264-0.157.2935+git545de2f/debian/x264.install x264-0.160.3011+gitcde9a93/debian/x264.install --- x264-0.157.2935+git545de2f/debian/x264.install 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264.install 2018-08-24 15:41:41.000000000 +0000 @@ -1,2 +1 @@ shared/usr/bin/x264 usr/bin -shared/usr/bin/x264-10bit usr/bin diff -Nru x264-0.157.2935+git545de2f/debian/x264.manpages x264-0.160.3011+gitcde9a93/debian/x264.manpages --- x264-0.157.2935+git545de2f/debian/x264.manpages 2018-01-19 11:35:17.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/debian/x264.manpages 2018-08-24 15:41:41.000000000 +0000 @@ -1,2 +1 @@ debian/x264.1 -debian/x264-10bit.1 diff -Nru x264-0.157.2935+git545de2f/encoder/analyse.c x264-0.160.3011+gitcde9a93/encoder/analyse.c --- x264-0.157.2935+git545de2f/encoder/analyse.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/analyse.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * analyse.c: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -49,8 +49,8 @@ int i_cost4x8[4]; /* cost per 8x8 partition */ int i_cost16x8; int i_cost8x16; - /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */ - ALIGNED_4( int16_t mvc[32][5][2] ); + /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3], [ref][5] is for alignment */ + ALIGNED_8( int16_t mvc[32][6][2] ); } x264_mb_analysis_list_t; typedef struct @@ -145,7 +145,7 @@ if( h->cost_mv[qp] ) return 0; - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; int lambda = x264_lambda_tab[qp]; /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */ CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) ); @@ -178,7 +178,7 @@ int x264_analyse_init_costs( x264_t *h ) { - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) ); if( !logs ) return -1; @@ -203,7 +203,7 @@ void x264_analyse_free_costs( x264_t *h ) { - int mv_range = h->param.analyse.i_mv_range; + int mv_range = h->param.analyse.i_mv_range << PARAM_INTERLACED; for( int i = 0; i < QP_MAX+1; i++ ) { if( h->cost_mv[i] ) @@ -359,8 +359,8 @@ for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- ) for( int j = 0; j < h->i_ref[i]; j++ ) { - x264_frame_cond_wait( h->fref[i][j]->orig, thresh ); - thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y ); + int completed = x264_frame_cond_wait( h->fref[i][j]->orig, thresh ); + thread_mvy_range = X264_MIN( thread_mvy_range, completed - pix_y ); } if( h->param.b_deterministic ) @@ -783,10 +783,11 @@ { if( !h->mb.b_lossless && predict_mode[5] >= 0 ) { - ALIGNED_ARRAY_16( int32_t, satd,[9] ); + ALIGNED_ARRAY_16( int32_t, satd,[4] ); h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd ); int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V]; - satd[i_pred_mode] -= 3 * lambda; + if( i_pred_mode < 3 ) + satd[i_pred_mode] -= 3 * lambda; for( int i = 2; i >= 0; i-- ) { int cost = satd[i]; @@ -901,10 +902,11 @@ { if( !h->mb.b_lossless && predict_mode[5] >= 0 ) { - ALIGNED_ARRAY_16( int32_t, satd,[9] ); + ALIGNED_ARRAY_16( int32_t, satd,[4] ); h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd ); int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V]; - satd[i_pred_mode] -= 3 * lambda; + if( i_pred_mode < 3 ) + satd[i_pred_mode] -= 3 * lambda; i_best = satd[I_PRED_4x4_DC]; a->i_predict4x4[idx] = I_PRED_4x4_DC; COPY2_IF_LT( i_best, satd[I_PRED_4x4_H], a->i_predict4x4[idx], I_PRED_4x4_H ); COPY2_IF_LT( i_best, satd[I_PRED_4x4_V], a->i_predict4x4[idx], I_PRED_4x4_V ); @@ -1088,8 +1090,8 @@ for( int idx = 0; idx < 16; idx++ ) { pixel *dst[3] = {h->mb.pic.p_fdec[0] + block_idx_xy_fdec[idx], - h->mb.pic.p_fdec[1] + block_idx_xy_fdec[idx], - h->mb.pic.p_fdec[2] + block_idx_xy_fdec[idx]}; + CHROMA_FORMAT ? h->mb.pic.p_fdec[1] + block_idx_xy_fdec[idx] : NULL, + CHROMA_FORMAT ? h->mb.pic.p_fdec[2] + block_idx_xy_fdec[idx] : NULL}; i_best = COST_MAX64; const int8_t *predict_mode = predict_4x4_mode_available( a->b_avoid_topright, h->mb.i_neighbour4[idx], idx ); @@ -1143,8 +1145,8 @@ int y = idx>>1; int s8 = X264_SCAN8_0 + 2*x + 16*y; pixel *dst[3] = {h->mb.pic.p_fdec[0] + 8*x + 8*y*FDEC_STRIDE, - h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE, - h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE}; + CHROMA_FORMAT ? h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE : NULL, + CHROMA_FORMAT ? h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE : NULL}; int cbp_luma_new = 0; int i_thresh = a->b_early_terminate ? a->i_satd_i8x8_dir[idx][a->i_predict8x8[idx]] * 11/8 : COST_MAX; @@ -1205,30 +1207,40 @@ (m)->i_stride[1] = h->mb.pic.i_stride[1]; \ (m)->i_stride[2] = h->mb.pic.i_stride[2]; \ (m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*FENC_STRIDE]; \ - (m)->p_fenc[1] = &(src)[1][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \ - (m)->p_fenc[2] = &(src)[2][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \ + if( CHROMA_FORMAT ) \ + { \ + (m)->p_fenc[1] = &(src)[1][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \ + (m)->p_fenc[2] = &(src)[2][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \ + } \ } #define LOAD_HPELS(m, src, list, ref, xoff, yoff) \ { \ (m)->p_fref_w = (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \ - (m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \ - (m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \ - (m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \ + if( h->param.analyse.i_subpel_refine ) \ + { \ + (m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \ + (m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \ + (m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \ + } \ if( CHROMA444 ) \ { \ (m)->p_fref[ 4] = &(src)[ 4][(xoff)+(yoff)*(m)->i_stride[1]]; \ - (m)->p_fref[ 5] = &(src)[ 5][(xoff)+(yoff)*(m)->i_stride[1]]; \ - (m)->p_fref[ 6] = &(src)[ 6][(xoff)+(yoff)*(m)->i_stride[1]]; \ - (m)->p_fref[ 7] = &(src)[ 7][(xoff)+(yoff)*(m)->i_stride[1]]; \ (m)->p_fref[ 8] = &(src)[ 8][(xoff)+(yoff)*(m)->i_stride[2]]; \ - (m)->p_fref[ 9] = &(src)[ 9][(xoff)+(yoff)*(m)->i_stride[2]]; \ - (m)->p_fref[10] = &(src)[10][(xoff)+(yoff)*(m)->i_stride[2]]; \ - (m)->p_fref[11] = &(src)[11][(xoff)+(yoff)*(m)->i_stride[2]]; \ + if( h->param.analyse.i_subpel_refine ) \ + { \ + (m)->p_fref[ 5] = &(src)[ 5][(xoff)+(yoff)*(m)->i_stride[1]]; \ + (m)->p_fref[ 6] = &(src)[ 6][(xoff)+(yoff)*(m)->i_stride[1]]; \ + (m)->p_fref[ 7] = &(src)[ 7][(xoff)+(yoff)*(m)->i_stride[1]]; \ + (m)->p_fref[ 9] = &(src)[ 9][(xoff)+(yoff)*(m)->i_stride[2]]; \ + (m)->p_fref[10] = &(src)[10][(xoff)+(yoff)*(m)->i_stride[2]]; \ + (m)->p_fref[11] = &(src)[11][(xoff)+(yoff)*(m)->i_stride[2]]; \ + } \ } \ - else \ + else if( CHROMA_FORMAT ) \ (m)->p_fref[4] = &(src)[4][(xoff)+((yoff)>>CHROMA_V_SHIFT)*(m)->i_stride[1]]; \ - (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]]; \ + if( h->param.analyse.i_me_method >= X264_ME_ESA ) \ + (m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]]; \ (m)->weight = x264_weight_none; \ (m)->i_ref = ref; \ } @@ -1244,7 +1256,7 @@ { x264_me_t m; int i_mvc; - ALIGNED_4( int16_t mvc[8][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[8],[2] ); int i_halfpel_thresh = INT_MAX; int *p_halfpel_thresh = (a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh : NULL; @@ -1469,7 +1481,7 @@ { x264_me_t m; pixel **p_fenc = h->mb.pic.p_fenc; - ALIGNED_4( int16_t mvc[3][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] ); /* XXX Needed for x264_mb_predict_mv */ h->mb.i_partition = D_16x8; @@ -1535,7 +1547,7 @@ { x264_me_t m; pixel **p_fenc = h->mb.pic.p_fenc; - ALIGNED_4( int16_t mvc[3][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] ); /* XXX Needed for x264_mb_predict_mv */ h->mb.i_partition = D_8x16; @@ -1882,7 +1894,7 @@ pixel *src0, *src1; intptr_t stride0 = 16, stride1 = 16; int i_ref, i_mvc; - ALIGNED_4( int16_t mvc[9][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[9],[2] ); int try_skip = a->b_try_skip; int list1_skipped = 0; int i_halfpel_thresh[2] = {INT_MAX, INT_MAX}; @@ -2348,7 +2360,7 @@ static void mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { ALIGNED_ARRAY_32( pixel, pix,[2],[16*8] ); - ALIGNED_4( int16_t mvc[3][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] ); h->mb.i_partition = D_16x8; a->i_cost16x8bi = 0; @@ -2442,7 +2454,7 @@ static void mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i_best_satd ) { ALIGNED_ARRAY_16( pixel, pix,[2],[8*16] ); - ALIGNED_4( int16_t mvc[3][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[3],[2] ); h->mb.i_partition = D_8x16; a->i_cost8x16bi = 0; @@ -3857,7 +3869,7 @@ int ref = h->mb.cache.ref[l][x264_scan8[0]]; if( ref < 0 ) continue; - completed = h->fref[l][ ref >> MB_INTERLACED ]->orig->i_lines_completed; + completed = x264_frame_cond_wait( h->fref[l][ ref >> MB_INTERLACED ]->orig, -1 ); if( (h->mb.cache.mv[l][x264_scan8[15]][1] >> (2 - MB_INTERLACED)) + h->mb.i_mb_y*16 > completed ) { x264_log( h, X264_LOG_WARNING, "internal error (MV out of thread range)\n"); diff -Nru x264-0.157.2935+git545de2f/encoder/analyse.h x264-0.160.3011+gitcde9a93/encoder/analyse.h --- x264-0.157.2935+git545de2f/encoder/analyse.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/analyse.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * analyse.h: macroblock analysis ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/encoder/api.c x264-0.160.3011+gitcde9a93/encoder/api.c --- x264-0.157.2935+git545de2f/encoder/api.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/api.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * api.c: bit depth independent interface ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Vittorio Giovara * Luca Barbato @@ -73,7 +73,7 @@ int (*encoder_invalidate_reference)( x264_t *, int64_t pts ); } x264_api_t; -static x264_api_t *encoder_open( x264_param_t *param ) +REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param ) { x264_api_t *api = calloc( 1, sizeof( x264_api_t ) ); if( !api ) @@ -118,82 +118,77 @@ return NULL; } - return api; -} - -x264_t *x264_encoder_open( x264_param_t *param ) -{ /* x264_t is opaque */ - return (x264_t *)x264_stack_align( encoder_open, param ); + return (x264_t *)api; } -void x264_encoder_close( x264_t *h ) +REALIGN_STACK void x264_encoder_close( x264_t *h ) { x264_api_t *api = (x264_api_t *)h; - x264_stack_align( api->encoder_close, api->x264 ); + api->encoder_close( api->x264 ); free( api ); } -void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ) +REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ) { x264_api_t *api = (x264_api_t *)h; - x264_stack_align( api->nal_encode, api->x264, dst, nal ); + api->nal_encode( api->x264, dst, nal ); } -int x264_encoder_reconfig( x264_t *h, x264_param_t *param) +REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_reconfig, api->x264, param ); + return api->encoder_reconfig( api->x264, param ); } -void x264_encoder_parameters( x264_t *h, x264_param_t *param ) +REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param ) { x264_api_t *api = (x264_api_t *)h; - x264_stack_align( api->encoder_parameters, api->x264, param ); + api->encoder_parameters( api->x264, param ); } -int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal ) +REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal ) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_headers, api->x264, pp_nal, pi_nal ); + return api->encoder_headers( api->x264, pp_nal, pi_nal ); } -int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ) +REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_encode, api->x264, pp_nal, pi_nal, pic_in, pic_out ); + return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out ); } -int x264_encoder_delayed_frames( x264_t *h ) +REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h ) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_delayed_frames, api->x264 ); + return api->encoder_delayed_frames( api->x264 ); } -int x264_encoder_maximum_delayed_frames( x264_t *h ) +REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h ) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_maximum_delayed_frames, api->x264 ); + return api->encoder_maximum_delayed_frames( api->x264 ); } -void x264_encoder_intra_refresh( x264_t *h ) +REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h ) { x264_api_t *api = (x264_api_t *)h; - x264_stack_align( api->encoder_intra_refresh, api->x264 ); + api->encoder_intra_refresh( api->x264 ); } -int x264_encoder_invalidate_reference( x264_t *h, int64_t pts ) +REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts ) { x264_api_t *api = (x264_api_t *)h; - return x264_stack_align( api->encoder_invalidate_reference, api->x264, pts ); + return api->encoder_invalidate_reference( api->x264, pts ); } diff -Nru x264-0.157.2935+git545de2f/encoder/cabac.c x264-0.160.3011+gitcde9a93/encoder/cabac.c --- x264-0.157.2935+git545de2f/encoder/cabac.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/cabac.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cabac.c: cabac bitstream writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/encoder/cavlc.c x264-0.160.3011+gitcde9a93/encoder/cavlc.c --- x264-0.157.2935+git545de2f/encoder/cavlc.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/cavlc.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cavlc.c: cavlc bitstream writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/encoder/encoder.c x264-0.160.3011+gitcde9a93/encoder/encoder.c --- x264-0.157.2935+git545de2f/encoder/encoder.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/encoder.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * encoder.c: top-level encoder functions ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -94,23 +94,23 @@ threadpool_wait_all( h ); /* Write the frame in display order */ - int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * sizeof(pixel) ); + int frame_size = FRAME_SIZE( h->param.i_height * h->param.i_width * SIZEOF_PIXEL ); if( !fseek( f, (int64_t)h->fdec->i_frame * frame_size, SEEK_SET ) ) { for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) for( int y = 0; y < h->param.i_height; y++ ) - fwrite( &h->fdec->plane[p][y*h->fdec->i_stride[p]], sizeof(pixel), h->param.i_width, f ); + fwrite( &h->fdec->plane[p][y*h->fdec->i_stride[p]], SIZEOF_PIXEL, h->param.i_width, f ); if( CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422 ) { int cw = h->param.i_width>>1; int ch = h->param.i_height>>CHROMA_V_SHIFT; - pixel *planeu = x264_malloc( 2 * (cw*ch*sizeof(pixel) + 32) ); + pixel *planeu = x264_malloc( 2 * (cw*ch*SIZEOF_PIXEL + 32) ); if( planeu ) { - pixel *planev = planeu + cw*ch + 32/sizeof(pixel); + pixel *planev = planeu + cw*ch + 32/SIZEOF_PIXEL; h->mc.plane_copy_deinterleave( planeu, cw, planev, cw, h->fdec->plane[1], h->fdec->i_stride[1], cw, ch ); - fwrite( planeu, 1, cw*ch*sizeof(pixel), f ); - fwrite( planev, 1, cw*ch*sizeof(pixel), f ); + fwrite( planeu, 1, cw*ch*SIZEOF_PIXEL, f ); + fwrite( planev, 1, cw*ch*SIZEOF_PIXEL, f ); x264_free( planeu ); } } @@ -370,6 +370,8 @@ if( (b_cabac && (h->cabac.p_end - h->cabac.p < size)) || (h->out.bs.p_end - h->out.bs.p < size) ) { + if( size > INT_MAX - h->out.i_bitstream ) + return -1; int buf_size = h->out.i_bitstream + size; uint8_t *buf = x264_malloc( buf_size ); if( !buf ) @@ -437,7 +439,7 @@ #if HAVE_MMX if( b_open ) { - int cpuflags = x264_cpu_detect(); + uint32_t cpuflags = x264_cpu_detect(); int fail = 0; #ifdef __SSE__ if( !(cpuflags & X264_CPU_SSE) ) @@ -470,7 +472,9 @@ } #endif - if( h->param.i_width <= 0 || h->param.i_height <= 0 ) +#define MAX_RESOLUTION 16384 + if( h->param.i_width <= 0 || h->param.i_height <= 0 || + h->param.i_width > MAX_RESOLUTION || h->param.i_height > MAX_RESOLUTION ) { x264_log( h, X264_LOG_ERROR, "invalid width x height (%dx%d)\n", h->param.i_width, h->param.i_height ); @@ -508,7 +512,7 @@ } int w_mod = 1; - int h_mod = 1 << PARAM_INTERLACED; + int h_mod = 1 << (PARAM_INTERLACED || h->param.b_fake_interlaced); if( i_csp == X264_CSP_I400 ) { h->param.analyse.i_chroma_qp_offset = 0; @@ -535,21 +539,21 @@ return -1; } - if( h->param.crop_rect.i_left >= h->param.i_width || - h->param.crop_rect.i_right >= h->param.i_width || - h->param.crop_rect.i_top >= h->param.i_height || - h->param.crop_rect.i_bottom >= h->param.i_height || + if( h->param.crop_rect.i_left < 0 || h->param.crop_rect.i_left >= h->param.i_width || + h->param.crop_rect.i_right < 0 || h->param.crop_rect.i_right >= h->param.i_width || + h->param.crop_rect.i_top < 0 || h->param.crop_rect.i_top >= h->param.i_height || + h->param.crop_rect.i_bottom < 0 || h->param.crop_rect.i_bottom >= h->param.i_height || h->param.crop_rect.i_left + h->param.crop_rect.i_right >= h->param.i_width || h->param.crop_rect.i_top + h->param.crop_rect.i_bottom >= h->param.i_height ) { - x264_log( h, X264_LOG_ERROR, "invalid crop-rect %u,%u,%u,%u\n", h->param.crop_rect.i_left, + x264_log( h, X264_LOG_ERROR, "invalid crop-rect %d,%d,%d,%d\n", h->param.crop_rect.i_left, h->param.crop_rect.i_top, h->param.crop_rect.i_right, h->param.crop_rect.i_bottom ); return -1; } if( h->param.crop_rect.i_left % w_mod || h->param.crop_rect.i_right % w_mod || h->param.crop_rect.i_top % h_mod || h->param.crop_rect.i_bottom % h_mod ) { - x264_log( h, X264_LOG_ERROR, "crop-rect %u,%u,%u,%u not divisible by %dx%d\n", h->param.crop_rect.i_left, + x264_log( h, X264_LOG_ERROR, "crop-rect %d,%d,%d,%d not divisible by %dx%d\n", h->param.crop_rect.i_left, h->param.crop_rect.i_top, h->param.crop_rect.i_right, h->param.crop_rect.i_bottom, w_mod, h_mod ); return -1; } @@ -821,6 +825,8 @@ if( h->param.i_avcintra_flavor == X264_AVCINTRA_FLAVOR_SONY ) { h->param.i_slice_count = 8; + if( h->param.b_sliced_threads ) + h->param.i_threads = h->param.i_slice_count; /* Sony XAVC unlike AVC-Intra doesn't seem to have a QP floor */ } else @@ -860,8 +866,8 @@ h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 ); h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, -1, QP_MAX ); h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 ); - h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f ); - h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f ); + h->param.rc.f_ip_factor = x264_clip3f( h->param.rc.f_ip_factor, 0.01, 10.0 ); + h->param.rc.f_pb_factor = x264_clip3f( h->param.rc.f_pb_factor, 0.01, 10.0 ); if( h->param.rc.i_rc_method == X264_RC_CRF ) { h->param.rc.i_qp_constant = h->param.rc.f_rf_constant + QP_BD_OFFSET; @@ -887,13 +893,6 @@ /* 8x8dct is not useful without RD in CAVLC lossless */ if( !h->param.b_cabac && h->param.analyse.i_subpel_refine < 6 ) h->param.analyse.b_transform_8x8 = 0; - h->param.analyse.inter &= ~X264_ANALYSE_I8x8; - h->param.analyse.intra &= ~X264_ANALYSE_I8x8; - } - if( i_csp >= X264_CSP_I444 && h->param.b_cabac ) - { - /* Disable 8x8dct during 4:4:4+CABAC encoding for compatibility with libavcodec */ - h->param.analyse.b_transform_8x8 = 0; } if( h->param.rc.i_rc_method == X264_RC_CQP ) { @@ -1652,9 +1651,13 @@ } h->out.i_nal = 0; - h->out.i_bitstream = X264_MAX( 1000000, h->param.i_width * h->param.i_height * 4 - * ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min ) - : pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor ))); + h->out.i_bitstream = x264_clip3f( + h->param.i_width * h->param.i_height * 4 + * ( h->param.rc.i_rc_method == X264_RC_ABR + ? pow( 0.95, h->param.rc.i_qp_min ) + : pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor ) ), + 1000000, INT_MAX/3 + ); h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */ CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size ); @@ -1763,9 +1766,11 @@ const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" : h->sps->i_profile_idc == PROFILE_MAIN ? "Main" : h->sps->i_profile_idc == PROFILE_HIGH ? "High" : - h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") : - h->sps->i_profile_idc == PROFILE_HIGH422 ? (h->sps->b_constraint_set3 == 1 ? "High 4:2:2 Intra" : "High 4:2:2") : - h->sps->b_constraint_set3 == 1 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive"; + h->sps->i_profile_idc == PROFILE_HIGH10 ? + (h->sps->b_constraint_set3 ? "High 10 Intra" : "High 10") : + h->sps->i_profile_idc == PROFILE_HIGH422 ? + (h->sps->b_constraint_set3 ? "High 4:2:2 Intra" : "High 4:2:2") : + h->sps->b_constraint_set3 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive"; char level[4]; snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 ); if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 && @@ -1943,11 +1948,13 @@ } static int check_encapsulated_buffer( x264_t *h, x264_t *h0, int start, - int previous_nal_size, int necessary_size ) + int64_t previous_nal_size, int64_t necessary_size ) { if( h0->nal_buffer_size < necessary_size ) { necessary_size *= 2; + if( necessary_size > INT_MAX ) + return -1; uint8_t *buf = x264_malloc( necessary_size ); if( !buf ) return -1; @@ -1969,12 +1976,14 @@ static int encoder_encapsulate_nals( x264_t *h, int start ) { x264_t *h0 = h->thread[0]; - int nal_size = 0, previous_nal_size = 0; + int64_t nal_size = 0, previous_nal_size = 0; if( h->param.nalu_process ) { for( int i = start; i < h->out.i_nal; i++ ) nal_size += h->out.nal[i].i_payload; + if( nal_size > INT_MAX ) + return -1; return nal_size; } @@ -1985,7 +1994,7 @@ nal_size += h->out.nal[i].i_payload; /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */ - int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64; + int64_t necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64; for( int i = start; i < h->out.i_nal; i++ ) necessary_size += h->out.nal[i].i_padding; if( check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) ) @@ -2381,7 +2390,7 @@ for( int i = minpix_y>>(CHROMA_V_SHIFT && p); i < maxpix_y>>(CHROMA_V_SHIFT && p); i++ ) memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p], h->fdec->plane[p] + i*h->fdec->i_stride[p], - h->mb.i_mb_width*16*sizeof(pixel) ); + h->mb.i_mb_width*16*SIZEOF_PIXEL ); if( h->fdec->b_kept_as_ref && (!h->param.b_sliced_threads || pass == 1) ) x264_frame_expand_border( h, h->fdec, min_y ); @@ -3076,6 +3085,7 @@ { int i_slice_num = 0; int last_thread_mb = h->sh.i_last_mb; + int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2; /* init stats */ memset( &h->stat.frame, 0, sizeof(h->stat.frame) ); @@ -3110,7 +3120,7 @@ int height = h->mb.i_mb_height >> PARAM_INTERLACED; int width = h->mb.i_mb_width << PARAM_INTERLACED; i_slice_num++; - h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1; + h->sh.i_last_mb = (height * i_slice_num + round_bias) / h->param.i_slice_count * width - 1; } } h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb ); @@ -3133,6 +3143,8 @@ static int threaded_slices_write( x264_t *h ) { + int round_bias = h->param.i_avcintra_class ? 0 : h->param.i_slice_count/2; + /* set first/last mb and sync contexts */ for( int i = 0; i < h->param.i_threads; i++ ) { @@ -3143,8 +3155,8 @@ memcpy( &t->i_frame, &h->i_frame, offsetof(x264_t, rc) - offsetof(x264_t, i_frame) ); } int height = h->mb.i_mb_height >> PARAM_INTERLACED; - t->i_threadslice_start = ((height * i + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED; - t->i_threadslice_end = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << PARAM_INTERLACED; + t->i_threadslice_start = ((height * i + round_bias) / h->param.i_threads) << PARAM_INTERLACED; + t->i_threadslice_end = ((height * (i+1) + round_bias) / h->param.i_threads) << PARAM_INTERLACED; t->sh.i_first_mb = t->i_threadslice_start * h->mb.i_mb_width; t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1; } @@ -3179,7 +3191,7 @@ nal_check_buffer( h ); } /* All entries in stat.frame are ints except for ssd/ssim. */ - for( int j = 0; j < (offsetof(x264_t,stat.frame.i_ssd) - offsetof(x264_t,stat.frame.i_mv_bits)) / sizeof(int); j++ ) + for( size_t j = 0; j < (offsetof(x264_t,stat.frame.i_ssd) - offsetof(x264_t,stat.frame.i_mv_bits)) / sizeof(int); j++ ) ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j]; for( int j = 0; j < 3; j++ ) h->stat.frame.i_ssd[j] += t->stat.frame.i_ssd[j]; @@ -3859,7 +3871,7 @@ pic_out->img.i_plane = h->fdec->i_plane; for( int i = 0; i < pic_out->img.i_plane; i++ ) { - pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel); + pic_out->img.i_stride[i] = h->fdec->i_stride[i] * SIZEOF_PIXEL; pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i]; } @@ -3879,7 +3891,7 @@ * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */ if( h->param.i_avcintra_class ) { - if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, frame_size + filler ) < 0 ) + if( check_encapsulated_buffer( h, h->thread[0], h->out.i_nal, frame_size, (int64_t)frame_size + filler ) < 0 ) return -1; x264_nal_t *nal = &h->out.nal[h->out.i_nal-1]; @@ -3946,8 +3958,6 @@ for( int i = 0; i < X264_MBTYPE_MAX; i++ ) h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i]; - for( int i = 0; i < X264_PARTTYPE_MAX; i++ ) - h->stat.i_mb_partition[h->sh.i_type][i] += h->stat.frame.i_mb_partition[i]; for( int i = 0; i < 2; i++ ) h->stat.i_mb_count_8x8dct[i] += h->stat.frame.i_mb_count_8x8dct[i]; for( int i = 0; i < 6; i++ ) @@ -3956,9 +3966,13 @@ for( int j = 0; j < 13; j++ ) h->stat.i_mb_pred_mode[i][j] += h->stat.frame.i_mb_pred_mode[i][j]; if( h->sh.i_type != SLICE_TYPE_I ) + { + for( int i = 0; i < X264_PARTTYPE_MAX; i++ ) + h->stat.i_mb_partition[h->sh.i_type][i] += h->stat.frame.i_mb_partition[i]; for( int i_list = 0; i_list < 2; i_list++ ) for( int i = 0; i < X264_REF_MAX*2; i++ ) h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i]; + } for( int i = 0; i < 3; i++ ) h->stat.i_mb_field[i] += h->stat.frame.i_mb_field[i]; if( h->sh.i_type == SLICE_TYPE_P && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE ) diff -Nru x264-0.157.2935+git545de2f/encoder/lookahead.c x264-0.160.3011+gitcde9a93/encoder/lookahead.c --- x264-0.157.2935+git545de2f/encoder/lookahead.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/lookahead.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * lookahead.c: high-level lookahead functions ***************************************************************************** - * Copyright (C) 2010-2018 Avail Media and x264 project + * Copyright (C) 2010-2020 Avail Media and x264 project * * Authors: Michael Kazmier * Alex Giladi @@ -87,7 +87,7 @@ x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex ); } -static void *lookahead_thread_internal( x264_t *h ) +REALIGN_STACK static void *lookahead_thread( x264_t *h ) { while( !h->lookahead->b_exit_thread ) { @@ -122,10 +122,6 @@ return NULL; } -static void *lookahead_thread( x264_t *h ) -{ - return (void*)x264_stack_align( lookahead_thread_internal, h ); -} #endif int x264_lookahead_init( x264_t *h, int i_slicetype_length ) diff -Nru x264-0.157.2935+git545de2f/encoder/macroblock.c x264-0.160.3011+gitcde9a93/encoder/macroblock.c --- x264-0.157.2935+git545de2f/encoder/macroblock.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/macroblock.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.c: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -530,8 +530,8 @@ { h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1]-FENC_STRIDE, FENC_STRIDE, height ); h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2]-FENC_STRIDE, FENC_STRIDE, height ); - memcpy( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[1]-FDEC_STRIDE, 8*sizeof(pixel) ); - memcpy( h->mb.pic.p_fdec[2], h->mb.pic.p_fdec[2]-FDEC_STRIDE, 8*sizeof(pixel) ); + memcpy( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[1]-FDEC_STRIDE, 8*SIZEOF_PIXEL ); + memcpy( h->mb.pic.p_fdec[2], h->mb.pic.p_fdec[2]-FDEC_STRIDE, 8*SIZEOF_PIXEL ); } else if( i_mode == I_PRED_CHROMA_H ) { @@ -560,7 +560,7 @@ if( i_mode == I_PRED_4x4_V ) { h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 ); - memcpy( p_dst, p_dst-FDEC_STRIDE, 4*sizeof(pixel) ); + memcpy( p_dst, p_dst-FDEC_STRIDE, 4*SIZEOF_PIXEL ); } else if( i_mode == I_PRED_4x4_H ) { @@ -580,7 +580,7 @@ if( i_mode == I_PRED_8x8_V ) { h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 ); - memcpy( p_dst, &edge[16], 8*sizeof(pixel) ); + memcpy( p_dst, &edge[16], 8*SIZEOF_PIXEL ); } else if( i_mode == I_PRED_8x8_H ) { @@ -600,7 +600,7 @@ if( i_mode == I_PRED_16x16_V ) { h->mc.copy[PIXEL_16x16]( p_dst, FDEC_STRIDE, h->mb.pic.p_fenc_plane[p]-stride, stride, 16 ); - memcpy( p_dst, p_dst-FDEC_STRIDE, 16*sizeof(pixel) ); + memcpy( p_dst, p_dst-FDEC_STRIDE, 16*SIZEOF_PIXEL ); } else if( i_mode == I_PRED_16x16_H ) { diff -Nru x264-0.157.2935+git545de2f/encoder/macroblock.h x264-0.160.3011+gitcde9a93/encoder/macroblock.h --- x264-0.157.2935+git545de2f/encoder/macroblock.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/macroblock.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * macroblock.h: macroblock encoding ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/encoder/me.c x264-0.160.3011+gitcde9a93/encoder/me.c --- x264-0.157.2935+git545de2f/encoder/me.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/me.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * me.c: motion estimation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -1043,8 +1043,8 @@ int chroma_x = (8 >> CHROMA_H_SHIFT) * x; int chroma_y = (8 >> chroma_v_shift) * y; pixel *pix = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE]; - pixel *pixu = &h->mb.pic.p_fdec[1][chroma_x + chroma_y*FDEC_STRIDE]; - pixel *pixv = &h->mb.pic.p_fdec[2][chroma_x + chroma_y*FDEC_STRIDE]; + pixel *pixu = CHROMA_FORMAT ? &h->mb.pic.p_fdec[1][chroma_x + chroma_y*FDEC_STRIDE] : NULL; + pixel *pixv = CHROMA_FORMAT ? &h->mb.pic.p_fdec[2][chroma_x + chroma_y*FDEC_STRIDE] : NULL; int ref0 = h->mb.cache.ref[0][s8]; int ref1 = h->mb.cache.ref[1][s8]; const int mv0y_offset = chroma_v_shift & MB_INTERLACED & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0; @@ -1256,11 +1256,16 @@ pixu = &h->mb.pic.p_fdec[1][block_idx_xy_fdec[i4]]; pixv = &h->mb.pic.p_fdec[2][block_idx_xy_fdec[i4]]; } - else + else if( CHROMA_FORMAT ) { pixu = &h->mb.pic.p_fdec[1][(i8>>1)*(8*FDEC_STRIDE>>chroma_v_shift)+(i8&1)*4]; pixv = &h->mb.pic.p_fdec[2][(i8>>1)*(8*FDEC_STRIDE>>chroma_v_shift)+(i8&1)*4]; } + else + { + pixu = NULL; + pixv = NULL; + } h->mb.b_skip_mc = 1; diff -Nru x264-0.157.2935+git545de2f/encoder/me.h x264-0.160.3011+gitcde9a93/encoder/me.h --- x264-0.157.2935+git545de2f/encoder/me.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/me.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * me.h: motion estimation ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -52,7 +52,7 @@ /* output */ int cost_mv; /* lambda * nbits for the chosen mv */ int cost; /* satd + lambda * nbits */ - ALIGNED_4( int16_t mv[2] ); + ALIGNED_8( int16_t mv[2] ); } ALIGNED_64( x264_me_t ); #define x264_me_search_ref x264_template(me_search_ref) diff -Nru x264-0.157.2935+git545de2f/encoder/ratecontrol.c x264-0.160.3011+gitcde9a93/encoder/ratecontrol.c --- x264-0.157.2935+git545de2f/encoder/ratecontrol.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/ratecontrol.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,13 +1,13 @@ /***************************************************************************** * ratecontrol.c: ratecontrol ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Michael Niedermayer * Gabriel Bouvigne * Fiona Glaser - * Mns Rullgrd + * Måns Rullgård * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -154,8 +154,8 @@ } mbtree; /* MBRC stuff */ - float frame_size_estimated; /* Access to this variable must be atomic: double is - * not atomic on all arches we care about */ + volatile float frame_size_estimated; /* Access to this variable must be atomic: double is + * not atomic on all arches we care about */ double frame_size_maximum; /* Maximum frame size due to MinCR */ double frame_size_planned; double slice_size_planned; @@ -553,7 +553,7 @@ if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) ) goto fail; - if( fread( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], sizeof(uint16_t), rc->mbtree.src_mb_count, rc->p_mbtree_stat_file_in ) != rc->mbtree.src_mb_count ) + if( fread( rc->mbtree.qp_buffer[rc->mbtree.qpbuf_pos], sizeof(uint16_t), rc->mbtree.src_mb_count, rc->p_mbtree_stat_file_in ) != (unsigned)rc->mbtree.src_mb_count ) goto fail; if( i_type != i_type_actual && rc->mbtree.qpbuf_pos == 1 ) @@ -713,8 +713,9 @@ rc->vbv_max_rate = vbv_max_bitrate; rc->buffer_size = vbv_buffer_size; rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size; - rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size - * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate); + if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR ) + rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size + * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate); if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.f_rf_constant_max ) { rc->rate_factor_max_increment = h->param.rc.f_rf_constant_max - h->param.rc.f_rf_constant; @@ -771,9 +772,9 @@ rc->last_non_b_pict_type = -1; rc->cbr_decay = 1.0; - if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.b_stat_read ) + if( h->param.rc.i_rc_method != X264_RC_ABR && h->param.rc.b_stat_read ) { - x264_log( h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n" ); + x264_log( h, X264_LOG_ERROR, "CRF/CQP is incompatible with 2pass.\n" ); return -1; } @@ -941,6 +942,7 @@ CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh ); CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop ); CMP_OPT_FIRST_PASS( "bluray_compat", h->param.b_bluray_compat ); + CMP_OPT_FIRST_PASS( "mbtree", h->param.rc.b_mb_tree ); if( (p = strstr( opts, "interlaced=" )) ) { @@ -1327,9 +1329,10 @@ static x264_zone_t *get_zone( x264_t *h, int frame_num ) { - for( int i = h->rc->i_zones - 1; i >= 0; i-- ) + x264_ratecontrol_t *rc = h->rc; + for( int i = rc->i_zones - 1; i >= 0; i-- ) { - x264_zone_t *z = &h->rc->zones[i]; + x264_zone_t *z = &rc->zones[i]; if( frame_num >= z->i_start && frame_num <= z->i_end ) return z; } @@ -1432,7 +1435,7 @@ { int frame = h->fenc->i_frame; assert( frame >= 0 && frame < rc->num_entries ); - rce = h->rc->rce = &h->rc->entry[frame]; + rce = rc->rce = &rc->entry[frame]; if( h->sh.i_type == SLICE_TYPE_B && h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO ) @@ -1693,7 +1696,7 @@ b1 = bits_so_far + predict_row_size_to_end( h, y, rc->qpm ) + size_of_other_slices; } - h->rc->frame_size_estimated = b1 - size_of_other_slices; + rc->frame_size_estimated = b1 - size_of_other_slices; /* If the current row was large enough to cause a large QP jump, try re-encoding it. */ if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row ) @@ -1709,12 +1712,12 @@ } else { - h->rc->frame_size_estimated = bits_so_far; + rc->frame_size_estimated = bits_so_far; /* Last-ditch attempt: if the last row of the frame underflowed the VBV, * try again. */ if( rc->qpm < qp_max && can_reencode_row - && (h->rc->frame_size_estimated + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) ) + && (bits_so_far + size_of_other_slices > X264_MIN( rc->frame_size_maximum, rc->buffer_fill )) ) { rc->qpm = qp_max; rc->qpa_rc = rc->qpa_rc_prev; @@ -1767,8 +1770,8 @@ h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24 + QP_BD_OFFSET : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P]; rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX ); - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX ); - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX ); + rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / h->param.rc.f_ip_factor ) + 0.5 ), 0, QP_MAX ); + rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * h->param.rc.f_pb_factor ) + 0.5 ), 0, QP_MAX ); x264_log( h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries ); x264_log( h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant ); @@ -1893,7 +1896,7 @@ h->mc.mbtree_fix8_pack( rc->mbtree.qp_buffer[0], h->fenc->f_qp_offset, h->mb.i_mb_count ); if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 ) goto fail; - if( fwrite( rc->mbtree.qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count ) + if( fwrite( rc->mbtree.qp_buffer[0], sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < (unsigned)h->mb.i_mb_count ) goto fail; } } @@ -1906,7 +1909,7 @@ { /* Depends on the fact that B-frame's QP is an offset from the following P-frame's. * Not perfectly accurate with B-refs, but good enough. */ - rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * fabs( h->param.rc.f_pb_factor )); + rc->cplxr_sum += bits * qp2qscale( rc->qpa_rc ) / (rc->last_rceq * h->param.rc.f_pb_factor); } rc->cplxr_sum *= rc->cbr_decay; rc->wanted_bits_window += h->fenc->f_duration * rc->bitrate; @@ -2027,18 +2030,14 @@ x264_zone_t *zone = get_zone( h, frame_num ); // force I/B quants as a function of P quants - const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P]; - const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type]; if( pict_type == SLICE_TYPE_I ) { double iq = q; double pq = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm ); - double ip_factor = fabs( h->param.rc.f_ip_factor ); + double ip_factor = h->param.rc.f_ip_factor; /* don't apply ip_factor if the following frame is also I */ if( rcc->accum_p_norm <= 0 ) q = iq; - else if( h->param.rc.f_ip_factor < 0 ) - q = iq / ip_factor; else if( rcc->accum_p_norm >= 1 ) q = pq / ip_factor; else @@ -2046,16 +2045,15 @@ } else if( pict_type == SLICE_TYPE_B ) { - if( h->param.rc.f_pb_factor > 0 ) - q = last_non_b_q; + q = rcc->last_qscale_for[rcc->last_non_b_pict_type]; if( !rce->kept_as_ref ) - q *= fabs( h->param.rc.f_pb_factor ); + q *= h->param.rc.f_pb_factor; } else if( pict_type == SLICE_TYPE_P && rcc->last_non_b_pict_type == SLICE_TYPE_P && rce->tex_bits == 0 ) { - q = last_p_q; + q = rcc->last_qscale_for[SLICE_TYPE_P]; } /* last qscale / qdiff stuff */ @@ -2211,7 +2209,7 @@ rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final_min / h->sps->vui.i_time_scale; if( h->i_thread_frames > 1 ) { - int j = h->rc - h->thread[0]->rc; + int j = rcc - h->thread[0]->rc; for( int i = 1; i < h->i_thread_frames; i++ ) { x264_t *t = h->thread[ (j+i)%h->i_thread_frames ]; @@ -2445,7 +2443,7 @@ /* Limit planned size by MinCR */ if( rcc->b_vbv ) rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); - h->rc->frame_size_estimated = rcc->frame_size_planned; + rcc->frame_size_estimated = rcc->frame_size_planned; /* For row SATDs */ if( rcc->b_vbv ) @@ -2458,7 +2456,7 @@ double predicted_bits = total_bits; if( h->i_thread_frames > 1 ) { - int j = h->rc - h->thread[0]->rc; + int j = rcc - h->thread[0]->rc; for( int i = 1; i < h->i_thread_frames; i++ ) { x264_t *t = h->thread[(j+i) % h->i_thread_frames]; @@ -2582,7 +2580,7 @@ && rcc->last_non_b_pict_type != SLICE_TYPE_I ) { q = qp2qscale( rcc->accum_p_qp / rcc->accum_p_norm ); - q /= fabs( h->param.rc.f_ip_factor ); + q /= h->param.rc.f_ip_factor; } else if( h->i_frame > 0 ) { @@ -2602,7 +2600,7 @@ } else if( h->param.rc.i_rc_method == X264_RC_CRF && rcc->qcompress != 1 ) { - q = qp2qscale( ABR_INIT_QP ) / fabs( h->param.rc.f_ip_factor ); + q = qp2qscale( ABR_INIT_QP ) / h->param.rc.f_ip_factor; } rcc->qp_novbv = qscale2qp( q ); @@ -2614,7 +2612,7 @@ rcc->last_qscale = q; if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 ) - rcc->last_qscale_for[SLICE_TYPE_P] = q * fabs( h->param.rc.f_ip_factor ); + rcc->last_qscale_for[SLICE_TYPE_P] = q * h->param.rc.f_ip_factor; if( rcc->b_2pass ) rcc->frame_size_planned = qscale2bits( &rce, q ); @@ -2627,7 +2625,7 @@ /* Limit planned size by MinCR */ if( rcc->b_vbv ) rcc->frame_size_planned = X264_MIN( rcc->frame_size_planned, rcc->frame_size_maximum ); - h->rc->frame_size_estimated = rcc->frame_size_planned; + rcc->frame_size_estimated = rcc->frame_size_planned; return q; } } diff -Nru x264-0.157.2935+git545de2f/encoder/ratecontrol.h x264-0.160.3011+gitcde9a93/encoder/ratecontrol.h --- x264-0.157.2935+git545de2f/encoder/ratecontrol.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/ratecontrol.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ratecontrol.h: ratecontrol ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar diff -Nru x264-0.157.2935+git545de2f/encoder/rdo.c x264-0.160.3011+gitcde9a93/encoder/rdo.c --- x264-0.157.2935+git545de2f/encoder/rdo.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/rdo.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * rdo.c: rate-distortion optimization ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Loren Merritt * Fiona Glaser @@ -374,8 +374,8 @@ * Trellis RD quantization ****************************************************************************/ -#define TRELLIS_SCORE_MAX -1LL // negative marks the node as invalid -#define TRELLIS_SCORE_BIAS 1LL<<60; // bias so that all valid scores are positive, even after negative contributions from psy +#define TRELLIS_SCORE_MAX (~0ULL) // marks the node as invalid +#define TRELLIS_SCORE_BIAS (1ULL<<60) // bias so that all valid scores are positive, even after negative contributions from psy #define CABAC_SIZE_BITS 8 #define LAMBDA_BITS 4 @@ -705,8 +705,12 @@ } #if HAVE_MMX && ARCH_X86_64 && !defined( __MACH__ ) + uint64_t level_state0; + memcpy( &level_state0, cabac_state, sizeof(uint64_t) ); + uint16_t level_state1; + memcpy( &level_state1, cabac_state+8, sizeof(uint16_t) ); #define TRELLIS_ARGS unquant_mf, zigzag, lambda2, last_nnz, orig_coefs, quant_coefs, dct,\ - cabac_state_sig, cabac_state_last, M64(cabac_state), M16(cabac_state+8) + cabac_state_sig, cabac_state_last, level_state0, level_state1 if( num_coefs == 16 && !dc ) if( b_chroma || !h->mb.i_psy_trellis ) return h->quantf.trellis_cabac_4x4( TRELLIS_ARGS, b_ac ); diff -Nru x264-0.157.2935+git545de2f/encoder/set.c x264-0.160.3011+gitcde9a93/encoder/set.c --- x264-0.157.2935+git545de2f/encoder/set.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/set.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set: header writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -105,6 +105,9 @@ sps->i_id = i_id; sps->i_mb_width = ( param->i_width + 15 ) / 16; sps->i_mb_height= ( param->i_height + 15 ) / 16; + sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced); + if( !sps->b_frame_mbs_only ) + sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1; sps->i_chroma_format_idc = csp >= X264_CSP_I444 ? CHROMA_444 : csp >= X264_CSP_I422 ? CHROMA_422 : csp >= X264_CSP_I420 ? CHROMA_420 : CHROMA_400; @@ -179,9 +182,6 @@ sps->b_vui = 1; sps->b_gaps_in_frame_num_value_allowed = 0; - sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced); - if( !sps->b_frame_mbs_only ) - sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1; sps->b_mb_adaptive_frame_field = param->b_interlaced; sps->b_direct8x8_inference = 1; @@ -250,7 +250,7 @@ sps->crop.i_left = param->crop_rect.i_left; sps->crop.i_top = param->crop_rect.i_top; sps->crop.i_right = param->crop_rect.i_right + sps->i_mb_width*16 - param->i_width; - sps->crop.i_bottom = (param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height) >> !sps->b_frame_mbs_only; + sps->crop.i_bottom = param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height; sps->b_crop = sps->crop.i_left || sps->crop.i_top || sps->crop.i_right || sps->crop.i_bottom; @@ -363,7 +363,7 @@ if( sps->b_crop ) { int h_shift = sps->i_chroma_format_idc == CHROMA_420 || sps->i_chroma_format_idc == CHROMA_422; - int v_shift = sps->i_chroma_format_idc == CHROMA_420; + int v_shift = (sps->i_chroma_format_idc == CHROMA_420) + !sps->b_frame_mbs_only; bs_write_ue( s, sps->crop.i_left >> h_shift ); bs_write_ue( s, sps->crop.i_right >> h_shift ); bs_write_ue( s, sps->crop.i_top >> v_shift ); @@ -594,7 +594,7 @@ memcpy( payload, uuid, 16 ); sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - " - "Copy%s 2003-2018 - http://www.videolan.org/x264.html - options: %s", + "Copy%s 2003-2020 - http://www.videolan.org/x264.html - options: %s", X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts ); length = strlen(payload)+1; @@ -794,7 +794,7 @@ { uint8_t data[6000]; const char *msg = "VANC"; - if( len > sizeof(data) ) + if( len < 0 || (unsigned)len > sizeof(data) ) { x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len ); return -1; diff -Nru x264-0.157.2935+git545de2f/encoder/set.h x264-0.160.3011+gitcde9a93/encoder/set.h --- x264-0.157.2935+git545de2f/encoder/set.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/set.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * set.h: header writing ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/encoder/slicetype.c x264-0.160.3011+gitcde9a93/encoder/slicetype.c --- x264-0.157.2935+git545de2f/encoder/slicetype.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/slicetype.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * slicetype.c: lookahead analysis ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Fiona Glaser * Loren Merritt @@ -297,12 +297,21 @@ float ref_mean[3]; for( int plane = 0; plane <= 2*!b_lookahead; plane++ ) { - int zero_bias = !ref->i_pixel_ssd[plane]; - float fenc_var = fenc->i_pixel_ssd[plane] + zero_bias; - float ref_var = ref->i_pixel_ssd[plane] + zero_bias; - guess_scale[plane] = sqrtf( fenc_var / ref_var ); - fenc_mean[plane] = (float)(fenc->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); - ref_mean[plane] = (float)( ref->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); + if( !plane || CHROMA_FORMAT ) + { + int zero_bias = !ref->i_pixel_ssd[plane]; + float fenc_var = fenc->i_pixel_ssd[plane] + zero_bias; + float ref_var = ref->i_pixel_ssd[plane] + zero_bias; + guess_scale[plane] = sqrtf( fenc_var / ref_var ); + fenc_mean[plane] = (float)(fenc->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); + ref_mean[plane] = (float)( ref->i_pixel_sum[plane] + zero_bias) / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8)); + } + else + { + guess_scale[plane] = 1; + fenc_mean[plane] = 0; + ref_mean[plane] = 0; + } } int chroma_denom = 7; @@ -405,8 +414,7 @@ * because scale has a much wider range than offset (because of denom), so * it should almost never need to be clamped. */ cur_offset = x264_clip3( cur_offset, -128, 127 ); - cur_scale = (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f; - cur_scale = x264_clip3( cur_scale, 0, 127 ); + cur_scale = x264_clip3f( (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f, 0, 127 ); } int start_offset = x264_clip3( cur_offset - offset_dist, -128, 127 ); int end_offset = x264_clip3( cur_offset + offset_dist, -128, 127 ); @@ -520,8 +528,8 @@ const int i_stride = fenc->i_stride_lowres; const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride); const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; - int16_t (*fenc_mvs[2])[2] = { &fenc->lowres_mvs[0][b-p0-1][i_mb_xy], &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] }; - int (*fenc_costs[2]) = { &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy], &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] }; + int16_t (*fenc_mvs[2])[2] = { b != p0 ? &fenc->lowres_mvs[0][b-p0-1][i_mb_xy] : NULL, b != p1 ? &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] : NULL }; + int (*fenc_costs[2]) = { b != p0 ? &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy] : NULL, b != p1 ? &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] : NULL }; int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 && i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) || h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2; @@ -650,7 +658,7 @@ { int i_mvc = 0; int16_t (*fenc_mv)[2] = fenc_mvs[l]; - ALIGNED_4( int16_t mvc[4][2] ); + ALIGNED_ARRAY_8( int16_t, mvc,[4],[2] ); /* Reverse-order MV prediction. */ M32( mvc[0] ) = 0; @@ -712,10 +720,10 @@ pixel *src = &fenc->lowres[0][i_pel_offset]; const int intra_penalty = 5 * a->i_lambda; int satds[3]; - int pixoff = 4 / sizeof(pixel); + int pixoff = 4 / SIZEOF_PIXEL; /* Avoid store forwarding stalls by writing larger chunks */ - memcpy( pix-FDEC_STRIDE, src-i_stride, 16 * sizeof(pixel) ); + memcpy( pix-FDEC_STRIDE, src-i_stride, 16 * SIZEOF_PIXEL ); for( int i = -1; i < 8; i++ ) M32( &pix[i*FDEC_STRIDE-pixoff] ) = M32( &src[i*i_stride-pixoff] ); @@ -1046,7 +1054,7 @@ uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost}; int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; - int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] }; + int16_t (*mvs[2])[2] = { b != p0 ? frames[b]->lowres_mvs[0][b-p0-1] : NULL, b != p1 ? frames[b]->lowres_mvs[1][p1-b-1] : NULL }; int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight}; int16_t *buf = h->scratch_buffer; uint16_t *propagate_cost = frames[b]->i_propagate_cost; diff -Nru x264-0.157.2935+git545de2f/encoder/slicetype-cl.c x264-0.160.3011+gitcde9a93/encoder/slicetype-cl.c --- x264-0.157.2935+git545de2f/encoder/slicetype-cl.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/slicetype-cl.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead) ***************************************************************************** - * Copyright (C) 2012-2018 x264 project + * Copyright (C) 2012-2020 x264 project * * Authors: Steve Borho * @@ -101,8 +101,8 @@ if( !h->opencl.lowres_mv_costs ) { /* Allocate shared memory buffers */ - int width = h->mb.i_mb_width * 8 * sizeof(pixel); - int height = h->mb.i_mb_height * 8 * sizeof(pixel); + int width = h->mb.i_mb_width * 8 * SIZEOF_PIXEL; + int height = h->mb.i_mb_height * 8 * SIZEOF_PIXEL; cl_image_format pixel_format; pixel_format.image_channel_order = CL_R; @@ -135,8 +135,8 @@ if( !fenc->opencl.intra_cost ) { /* Allocate per-frame buffers */ - int width = h->mb.i_mb_width * 8 * sizeof(pixel); - int height = h->mb.i_mb_height * 8 * sizeof(pixel); + int width = h->mb.i_mb_width * 8 * SIZEOF_PIXEL; + int height = h->mb.i_mb_height * 8 * SIZEOF_PIXEL; cl_image_format pixel_format; pixel_format.image_channel_order = CL_R; diff -Nru x264-0.157.2935+git545de2f/encoder/slicetype-cl.h x264-0.160.3011+gitcde9a93/encoder/slicetype-cl.h --- x264-0.157.2935+git545de2f/encoder/slicetype-cl.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/encoder/slicetype-cl.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * slicetype-cl.h: OpenCL slicetype decision code (lowres lookahead) ***************************************************************************** - * Copyright (C) 2017-2018 x264 project + * Copyright (C) 2017-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/example.c x264-0.160.3011+gitcde9a93/example.c --- x264-0.157.2935+git545de2f/example.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/example.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * example.c: libx264 API usage example ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Anton Mitrofanov * @@ -97,11 +97,11 @@ for( ;; i_frame++ ) { /* Read input frame */ - if( fread( pic.img.plane[0], 1, luma_size, stdin ) != luma_size ) + if( fread( pic.img.plane[0], 1, luma_size, stdin ) != (unsigned)luma_size ) break; - if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != chroma_size ) + if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != (unsigned)chroma_size ) break; - if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != chroma_size ) + if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != (unsigned)chroma_size ) break; pic.i_pts = i_frame; diff -Nru x264-0.157.2935+git545de2f/extras/intel_dispatcher.h x264-0.160.3011+gitcde9a93/extras/intel_dispatcher.h --- x264-0.157.2935+git545de2f/extras/intel_dispatcher.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/extras/intel_dispatcher.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * intel_dispatcher.h: intel compiler cpu dispatcher override ***************************************************************************** - * Copyright (C) 2014-2018 x264 project + * Copyright (C) 2014-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/filters/filters.c x264-0.160.3011+gitcde9a93/filters/filters.c --- x264-0.157.2935+git545de2f/filters/filters.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/filters.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * filters.c: common filter functions ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Diogo Franco * Steven Walters @@ -31,7 +31,8 @@ char **x264_split_options( const char *opt_str, const char * const *options ) { - int opt_count = 0, options_count = 0, found_named = 0, size = 0; + int opt_count = 0, options_count = 0, found_named = 0; + size_t size = 0; const char *opt = opt_str; if( !opt_str ) @@ -42,7 +43,7 @@ do { - int length = strcspn( opt, "=," ); + size_t length = strcspn( opt, "=," ); if( opt[length] == '=' ) { const char * const *option = options; @@ -63,7 +64,7 @@ opt += length; } while( *opt++ ); - int offset = 2 * (opt_count+1) * sizeof(char*); + size_t offset = 2 * (opt_count+1) * sizeof(char*); size += offset + (opt - opt_str); char **opts = calloc( 1, size ); RETURN_IF_ERROR( !opts, "malloc failed\n" ); @@ -77,7 +78,7 @@ for( int i = 0; i < 2*opt_count; ) { - int length = strcspn( opt_str, "=," ); + size_t length = strcspn( opt_str, "=," ); if( opt_str[length] == '=' ) { insert_opt( opt_str, length ); @@ -86,7 +87,7 @@ else { const char *option = options[i/2]; - int option_length = strlen( option ); + size_t option_length = strlen( option ); insert_opt( option, option_length ); } insert_opt( opt_str, length ); diff -Nru x264-0.157.2935+git545de2f/filters/filters.h x264-0.160.3011+gitcde9a93/filters/filters.h --- x264-0.157.2935+git545de2f/filters/filters.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/filters.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * filters.h: common filter functions ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Diogo Franco * Steven Walters diff -Nru x264-0.157.2935+git545de2f/filters/video/cache.c x264-0.160.3011+gitcde9a93/filters/video/cache.c --- x264-0.157.2935+git545de2f/filters/video/cache.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/cache.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * cache.c: cache video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/crop.c x264-0.160.3011+gitcde9a93/filters/video/crop.c --- x264-0.157.2935+git545de2f/filters/video/crop.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/crop.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * crop.c: crop video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * James Darnley diff -Nru x264-0.157.2935+git545de2f/filters/video/depth.c x264-0.160.3011+gitcde9a93/filters/video/depth.c --- x264-0.157.2935+git545de2f/filters/video/depth.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/depth.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * depth.c: bit-depth conversion video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Oskar Arvidsson * @@ -115,7 +115,7 @@ int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved; #define CALL_DITHER_PLANE( pitch, off ) \ - dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \ + dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/SIZEOF_PIXEL, \ ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf ) if( num_interleaved == 4 ) diff -Nru x264-0.157.2935+git545de2f/filters/video/fix_vfr_pts.c x264-0.160.3011+gitcde9a93/filters/video/fix_vfr_pts.c --- x264-0.157.2935+git545de2f/filters/video/fix_vfr_pts.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/fix_vfr_pts.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * fix_vfr_pts.c: vfr pts fixing video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/internal.c x264-0.160.3011+gitcde9a93/filters/video/internal.c --- x264-0.157.2935+git545de2f/filters/video/internal.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/internal.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * internal.c: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/internal.h x264-0.160.3011+gitcde9a93/filters/video/internal.h --- x264-0.157.2935+git545de2f/filters/video/internal.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/internal.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * internal.h: video filter utilities ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/resize.c x264-0.160.3011+gitcde9a93/filters/video/resize.c --- x264-0.157.2935+git545de2f/filters/video/resize.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/resize.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * resize.c: resize video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * @@ -483,6 +483,10 @@ int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH ); int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH ); + FAIL_IF_ERROR( h->dst.width <= 0 || h->dst.height <= 0 || + h->dst.width > MAX_RESOLUTION || h->dst.height > MAX_RESOLUTION, + "invalid width x height (%dx%d)\n", h->dst.width, h->dst.height ); + /* confirm swscale can support this conversion */ FAIL_IF_ERROR( src_pix_fmt == AV_PIX_FMT_NONE && src_pix_fmt_inv != AV_PIX_FMT_NONE, "input colorspace %s with bit depth %d is not supported\n", av_get_pix_fmt_name( src_pix_fmt_inv ), diff -Nru x264-0.157.2935+git545de2f/filters/video/select_every.c x264-0.160.3011+gitcde9a93/filters/video/select_every.c --- x264-0.157.2935+git545de2f/filters/video/select_every.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/select_every.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * select_every.c: select-every video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * @@ -63,7 +63,7 @@ h->pattern_len = 0; h->step_size = 0; int offsets[MAX_PATTERN_SIZE]; - for( char *tok, *p = opt_string; (tok = strtok( p, "," )); p = NULL ) + for( char *tok, *p = opt_string, UNUSED *saveptr = NULL; (tok = strtok_r( p, ",", &saveptr )); p = NULL ) { int val = x264_otoi( tok, -1 ); if( p ) diff -Nru x264-0.157.2935+git545de2f/filters/video/source.c x264-0.160.3011+gitcde9a93/filters/video/source.c --- x264-0.157.2935+git545de2f/filters/video/source.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/source.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * source.c: source video filter ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/video.c x264-0.160.3011+gitcde9a93/filters/video/video.c --- x264-0.157.2935+git545de2f/filters/video/video.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/video.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * video.c: video filters ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/filters/video/video.h x264-0.160.3011+gitcde9a93/filters/video/video.h --- x264-0.157.2935+git545de2f/filters/video/video.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/filters/video/video.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * video.h: video filters ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * diff -Nru x264-0.157.2935+git545de2f/.gitlab-ci.yml x264-0.160.3011+gitcde9a93/.gitlab-ci.yml --- x264-0.157.2935+git545de2f/.gitlab-ci.yml 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/.gitlab-ci.yml 2020-07-13 10:30:21.000000000 +0000 @@ -0,0 +1,222 @@ +stages: + - build + - test + - release + +.variables-debian-amd64: &variables-debian-amd64 + _TRIPLET: "" + _PLATFORMSUFFIX: "" + _PATH: "debian-x86_64" + _WRAPPER: "" + +.variables-debian-aarch64: &variables-debian-aarch64 + _TRIPLET: "" + _PLATFORMSUFFIX: "" + _PATH: "debian-aarch64" + _WRAPPER: "" + +.variables-win32: &variables-win32 + _TRIPLET: "i686-w64-mingw32" + _PLATFORMSUFFIX: ".exe" + _PATH: "win32" + _WRAPPER: "wine" + +.variables-win64: &variables-win64 + _TRIPLET: "x86_64-w64-mingw32" + _PLATFORMSUFFIX: ".exe" + _PATH: "win64" + _WRAPPER: "wine64" + +.variables-macos: &variables-macos + _TRIPLET: "x86_64-apple-darwin18" + _PLATFORMSUFFIX: "" + _PATH: "macos-x86_64" + _WRAPPER: "" + +.build: + stage: build + script: | + set -x + LOCAL_INSTALL_DIR=`pwd`/local_install + export PKG_CONFIG_LIBDIR=${LOCAL_INSTALL_DIR}/lib/pkgconfig + git clone --depth 1 --branch master https://git.ffmpeg.org/ffmpeg.git ffmpeg + cd ffmpeg + ./configure --prefix="${LOCAL_INSTALL_DIR}" --enable-pic --disable-debug --extra-ldflags="-static" --disable-programs --disable-doc --disable-avdevice --disable-postproc --disable-avfilter --disable-network --disable-encoders --disable-muxers + make -j$(getconf _NPROCESSORS_ONLN) + make -j$(getconf _NPROCESSORS_ONLN) install + cd .. + git clone --depth 1 --branch master https://github.com/l-smash/l-smash.git lsmash + cd lsmash + ./configure --prefix="${LOCAL_INSTALL_DIR}" --extra-ldflags="-static" + make -j$(getconf _NPROCESSORS_ONLN) + make -j$(getconf _NPROCESSORS_ONLN) install + cd .. + ./configure --enable-pic --enable-strip --extra-ldflags="-static" + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + artifacts: + name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA" + paths: + - x264${_PLATFORMSUFFIX} + - checkasm8${_PLATFORMSUFFIX} + - checkasm10${_PLATFORMSUFFIX} + expire_in: 1 week + +build-debian-amd64: + extends: .build + image: registry.videolan.org/x264-debian-unstable:20190404162201 + tags: + - docker + - amd64 + variables: *variables-debian-amd64 + +build-debian-aarch64: + extends: .build + image: registry.videolan.org/x264-debian-unstable-aarch64:20190716192245 + tags: + - docker + - aarch64 + variables: *variables-debian-aarch64 + +.build-win: + extends: build-debian-amd64 + script: | + set -x + LOCAL_INSTALL_DIR=`pwd`/${_TRIPLET} + export PKG_CONFIG_LIBDIR=${LOCAL_INSTALL_DIR}/lib/pkgconfig + curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/contribs/vlc-contrib-${_TRIPLET}-latest.tar.bz2 + bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2 + tar xvf vlc-contrib-${_TRIPLET}-latest.tar + sed -i "s#@@CONTRIB_PREFIX@@#${LOCAL_INSTALL_DIR}#g" ${PKG_CONFIG_LIBDIR}/*.pc + git clone --depth 1 --branch master https://github.com/l-smash/l-smash.git lsmash + cd lsmash + ./configure --prefix="${LOCAL_INSTALL_DIR}" --target-os="${_TRIPLET}" --cross-prefix="${_TRIPLET}-" + make -j$(getconf _NPROCESSORS_ONLN) + make -j$(getconf _NPROCESSORS_ONLN) install + cd .. + ./configure --host="${_TRIPLET}" --cross-prefix="${_TRIPLET}-" --enable-pic --enable-strip + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + +build-win32: + extends: .build-win + variables: *variables-win32 + +build-win64: + extends: .build-win + variables: *variables-win64 + +build-macos: + extends: .build + tags: + - macos + script: | + set -x + LOCAL_INSTALL_DIR=`pwd`/${_TRIPLET} + export PKG_CONFIG_LIBDIR=${LOCAL_INSTALL_DIR}/lib/pkgconfig + curl -f -o vlc-contrib-${_TRIPLET}-latest.tar.bz2 https://nightlies.videolan.org/build/contribs/vlc-contrib-${_TRIPLET}-latest.tar.bz2 + bunzip2 vlc-contrib-${_TRIPLET}-latest.tar.bz2 + tar xvf vlc-contrib-${_TRIPLET}-latest.tar + sed -i.bak "s#@@CONTRIB_PREFIX@@#${LOCAL_INSTALL_DIR}#g" ${PKG_CONFIG_LIBDIR}/*.pc + git clone --depth 1 --branch master https://github.com/l-smash/l-smash.git lsmash + cd lsmash + ./configure --prefix="${LOCAL_INSTALL_DIR}" + make -j$(getconf _NPROCESSORS_ONLN) + make -j$(getconf _NPROCESSORS_ONLN) install + cd .. + ./configure --enable-pic --enable-strip + make -j$(getconf _NPROCESSORS_ONLN) x264 checkasm + variables: *variables-macos + +.test: &test + stage: test + script: | + set -x + ${_WRAPPER} ./checkasm8${_PLATFORMSUFFIX} + ${_WRAPPER} ./checkasm10${_PLATFORMSUFFIX} + artifacts: + expire_in: 10 minutes + +test-debian-amd64: + <<: *test + extends: build-debian-amd64 + dependencies: + - build-debian-amd64 + variables: *variables-debian-amd64 + +test-debian-aarch64: + <<: *test + extends: build-debian-aarch64 + dependencies: + - build-debian-aarch64 + variables: *variables-debian-aarch64 + +test-win32: + <<: *test + extends: build-win32 + dependencies: + - build-win32 + variables: *variables-win32 + +test-win64: + <<: *test + extends: build-win64 + dependencies: + - build-win64 + variables: *variables-win64 + +test-macos: + <<: *test + extends: build-macos + dependencies: + - build-macos + variables: *variables-macos + +.release: &release + stage: release + script: | + set -x + _VERSION=$(./version.sh | grep _VERSION -| cut -d\ -f4-| sed 's, ,-,g' | sed 's,",,') + mv x264${_PLATFORMSUFFIX} x264-${_VERSION}${_PLATFORMSUFFIX} + when: manual + only: + - master@videolan/x264 + - stable@videolan/x264 + artifacts: + name: "$CI_PROJECT_PATH_SLUG-$CI_JOB_NAME-$CI_COMMIT_SHORT_SHA" + paths: + - x264-*${_PLATFORMSUFFIX} + expire_in: '10 minutes' + +release-debian-amd64: + <<: *release + extends: build-debian-amd64 + dependencies: + - build-debian-amd64 + variables: *variables-debian-amd64 + +release-debian-aarch64: + <<: *release + extends: build-debian-aarch64 + dependencies: + - build-debian-aarch64 + variables: *variables-debian-aarch64 + +release-win32: + <<: *release + extends: build-win32 + dependencies: + - build-win32 + variables: *variables-win32 + +release-win64: + <<: *release + extends: build-win64 + dependencies: + - build-win64 + variables: *variables-win64 + +release-macos: + <<: *release + extends: build-macos + dependencies: + - build-macos + variables: *variables-macos diff -Nru x264-0.157.2935+git545de2f/input/avs.c x264-0.160.3011+gitcde9a93/input/avs.c --- x264-0.157.2935+git545de2f/input/avs.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/avs.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * avs.c: avisynth input ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Steven Walters * Anton Mitrofanov diff -Nru x264-0.157.2935+git545de2f/input/ffms.c x264-0.160.3011+gitcde9a93/input/ffms.c --- x264-0.157.2935+git545de2f/input/ffms.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/ffms.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * ffms.c: ffmpegsource input ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Mike Gurlitz * Steven Walters diff -Nru x264-0.157.2935+git545de2f/input/input.c x264-0.160.3011+gitcde9a93/input/input.c --- x264-0.157.2935+git545de2f/input/input.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/input.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * input.c: common input functions ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Steven Walters * Henrik Gramner @@ -65,22 +65,22 @@ return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1; } -uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane ) +int64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane ) { int csp_mask = csp & X264_CSP_MASK; if( x264_cli_csp_is_invalid( csp ) || plane < 0 || plane >= x264_cli_csps[csp_mask].planes ) return 0; - uint64_t size = (uint64_t)width * height; + int64_t size = (int64_t)width * height; size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane]; size *= x264_cli_csp_depth_factor( csp ); return size; } -uint64_t x264_cli_pic_size( int csp, int width, int height ) +int64_t x264_cli_pic_size( int csp, int width, int height ) { if( x264_cli_csp_is_invalid( csp ) ) return 0; - uint64_t size = 0; + int64_t size = 0; int csp_mask = csp & X264_CSP_MASK; for( int i = 0; i < x264_cli_csps[csp_mask].planes; i++ ) size += x264_cli_pic_plane_size( csp, width, height, i ); @@ -107,7 +107,7 @@ if( alloc ) { - size_t size = (size_t)(height * x264_cli_csps[csp_mask].height[i]) * stride; + int64_t size = (int64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride; pic->img.plane[i] = x264_malloc( size ); if( !pic->img.plane[i] ) return -1; @@ -182,24 +182,26 @@ * in segfaults. We have to pad the buffer size as a workaround to avoid that. */ #define MMAP_PADDING 64 -void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size ) +void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, int64_t size ) { #if defined(_WIN32) || HAVE_MMAP uint8_t *base; int align = offset & h->align_mask; + if( offset < 0 || size < 0 || (uint64_t)size > (SIZE_MAX - MMAP_PADDING - align) ) + return NULL; offset -= align; size += align; #ifdef _WIN32 /* If the padding crosses a page boundary we need to increase the mapping size. */ size_t padded_size = (-size & h->page_mask) < MMAP_PADDING ? size + MMAP_PADDING : size; - if( offset + padded_size > h->file_size ) + if( (uint64_t)offset + padded_size > (uint64_t)h->file_size ) { /* It's not possible to do the POSIX mmap() remapping trick on Windows, so if the padding crosses a * page boundary past the end of the file we have to copy the entire frame into a padded buffer. */ - if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size )) ) + if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, (uint64_t)offset >> 32, offset, size )) ) { uint8_t *buf = NULL; - HANDLE anon_map = CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, padded_size, NULL ); + HANDLE anon_map = CreateFileMappingW( INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, (uint64_t)padded_size >> 32, padded_size, NULL ); if( anon_map ) { if( (buf = MapViewOfFile( anon_map, FILE_MAP_WRITE, 0, 0, 0 )) ) @@ -213,7 +215,7 @@ return buf; } } - else if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, padded_size )) ) + else if( (base = MapViewOfFile( h->map_handle, FILE_MAP_READ, (uint64_t)offset >> 32, offset, padded_size )) ) { /* PrefetchVirtualMemory() is only available on Windows 8 and newer. */ if( h->prefetch_virtual_memory ) @@ -249,13 +251,15 @@ return NULL; } -int x264_cli_munmap( cli_mmap_t *h, void *addr, size_t size ) +int x264_cli_munmap( cli_mmap_t *h, void *addr, int64_t size ) { #if defined(_WIN32) || HAVE_MMAP void *base = (void*)((intptr_t)addr & ~h->align_mask); #ifdef _WIN32 return !UnmapViewOfFile( base ); #else + if( size < 0 || size > (SIZE_MAX - MMAP_PADDING - ((intptr_t)addr - (intptr_t)base)) ) + return -1; return munmap( base, size + MMAP_PADDING + (intptr_t)addr - (intptr_t)base ); #endif #endif diff -Nru x264-0.157.2935+git545de2f/input/input.h x264-0.160.3011+gitcde9a93/input/input.h --- x264-0.157.2935+git545de2f/input/input.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/input.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * input.h: file input ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -132,8 +132,8 @@ int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height ); int x264_cli_pic_init_noalloc( cli_pic_t *pic, int csp, int width, int height ); void x264_cli_pic_clean( cli_pic_t *pic ); -uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane ); -uint64_t x264_cli_pic_size( int csp, int width, int height ); +int64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane ); +int64_t x264_cli_pic_size( int csp, int width, int height ); const x264_cli_csp_t *x264_cli_get_csp( int csp ); typedef struct @@ -151,8 +151,8 @@ } cli_mmap_t; int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh ); -void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size ); -int x264_cli_munmap( cli_mmap_t *h, void *addr, size_t size ); +void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, int64_t size ); +int x264_cli_munmap( cli_mmap_t *h, void *addr, int64_t size ); void x264_cli_mmap_close( cli_mmap_t *h ); #endif diff -Nru x264-0.157.2935+git545de2f/input/lavf.c x264-0.160.3011+gitcde9a93/input/lavf.c --- x264-0.157.2935+git545de2f/input/lavf.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/lavf.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * lavf.c: libavformat input ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Mike Gurlitz * Steven Walters diff -Nru x264-0.157.2935+git545de2f/input/raw.c x264-0.160.3011+gitcde9a93/input/raw.c --- x264-0.157.2935+git545de2f/input/raw.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/raw.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw input ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -33,8 +33,8 @@ { FILE *fh; int next_frame; - uint64_t plane_size[4]; - uint64_t frame_size; + int64_t plane_size[4]; + int64_t frame_size; int bit_depth; cli_mmap_t mmap; int use_mmap; @@ -96,7 +96,7 @@ if( x264_is_regular_file( h->fh ) ) { fseek( h->fh, 0, SEEK_END ); - uint64_t size = ftell( h->fh ); + int64_t size = ftell( h->fh ); fseek( h->fh, 0, SEEK_SET ); info->num_frames = size / h->frame_size; FAIL_IF_ERROR( !info->num_frames, "empty input file\n" ); @@ -121,7 +121,7 @@ if( i ) pic->img.plane[i] = pic->img.plane[i-1] + pixel_depth * h->plane_size[i-1]; } - else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i] ) + else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != (uint64_t)h->plane_size[i] ) return -1; if( bit_depth_uc ) @@ -129,9 +129,9 @@ /* upconvert non 16bit high depth planes to 16bit using the same * algorithm as used in the depth filter. */ uint16_t *plane = (uint16_t*)pic->img.plane[i]; - uint64_t pixel_count = h->plane_size[i]; + int64_t pixel_count = h->plane_size[i]; int lshift = 16 - h->bit_depth; - for( uint64_t j = 0; j < pixel_count; j++ ) + for( int64_t j = 0; j < pixel_count; j++ ) plane[j] = plane[j] << lshift; } } diff -Nru x264-0.157.2935+git545de2f/input/thread.c x264-0.160.3011+gitcde9a93/input/thread.c --- x264-0.157.2935+git545de2f/input/thread.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/thread.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * thread.c: threaded input ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/input/timecode.c x264-0.160.3011+gitcde9a93/input/timecode.c --- x264-0.157.2935+git545de2f/input/timecode.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/timecode.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * timecode.c: timecode file input ***************************************************************************** - * Copyright (C) 2010-2018 x264 project + * Copyright (C) 2010-2020 x264 project * * Authors: Yusuke Nakamura * @@ -105,7 +105,7 @@ #define NO_TIMECODE_LINE (buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r') if( tcfv == 1 ) { - uint64_t file_pos; + int64_t file_pos; double assume_fps, seq_fps; int start, end = -1; int prev_start = -1, prev_end = -1; @@ -221,7 +221,7 @@ } else /* tcfv == 2 */ { - uint64_t file_pos = ftell( tcfile_in ); + int64_t file_pos = ftell( tcfile_in ); h->stored_pts_num = 0; while( fgets( buff, sizeof(buff), tcfile_in ) != NULL ) diff -Nru x264-0.157.2935+git545de2f/input/y4m.c x264-0.160.3011+gitcde9a93/input/y4m.c --- x264-0.157.2935+git545de2f/input/y4m.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/input/y4m.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * y4m.c: y4m input ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -34,8 +34,8 @@ int next_frame; int seq_header_len; int frame_header_len; - uint64_t frame_size; - uint64_t plane_size[3]; + int64_t frame_size; + int64_t plane_size[3]; int bit_depth; cli_mmap_t mmap; int use_mmap; @@ -213,10 +213,10 @@ if( x264_is_regular_file( h->fh ) ) { - uint64_t init_pos = ftell( h->fh ); + int64_t init_pos = ftell( h->fh ); /* Find out the length of the frame header */ - int len = 1; + size_t len = 1; while( len <= MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' ) len++; FAIL_IF_ERROR( len > MAX_FRAME_HEADER || len < sizeof(Y4M_FRAME_MAGIC), "bad frame header length\n" ); @@ -224,7 +224,7 @@ h->frame_size += len; fseek( h->fh, 0, SEEK_END ); - uint64_t i_size = ftell( h->fh ); + int64_t i_size = ftell( h->fh ); fseek( h->fh, init_pos, SEEK_SET ); info->num_frames = (i_size - h->seq_header_len) / h->frame_size; FAIL_IF_ERROR( !info->num_frames, "empty input file\n" ); @@ -277,7 +277,7 @@ if( i ) pic->img.plane[i] = pic->img.plane[i-1] + pixel_depth * h->plane_size[i-1]; } - else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i] ) + else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != (uint64_t)h->plane_size[i] ) return -1; if( bit_depth_uc ) @@ -285,9 +285,9 @@ /* upconvert non 16bit high depth planes to 16bit using the same * algorithm as used in the depth filter. */ uint16_t *plane = (uint16_t*)pic->img.plane[i]; - uint64_t pixel_count = h->plane_size[i]; + int64_t pixel_count = h->plane_size[i]; int lshift = 16 - h->bit_depth; - for( uint64_t j = 0; j < pixel_count; j++ ) + for( int64_t j = 0; j < pixel_count; j++ ) plane[j] = plane[j] << lshift; } } diff -Nru x264-0.157.2935+git545de2f/Makefile x264-0.160.3011+gitcde9a93/Makefile --- x264-0.157.2935+git545de2f/Makefile 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/Makefile 2020-07-13 10:30:21.000000000 +0000 @@ -8,6 +8,9 @@ vpath %.asm $(SRCPATH) vpath %.rc $(SRCPATH) +CFLAGS += $(CFLAGSPROF) +LDFLAGS += $(LDFLAGSPROF) + GENERATED = all: default @@ -27,8 +30,8 @@ SRCS_8 = -SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \ - output/raw.c output/matroska.c output/matroska_ebml.c \ +SRCCLI = x264.c autocomplete.c input/input.c input/timecode.c input/raw.c \ + input/y4m.c output/raw.c output/matroska.c output/matroska_ebml.c \ output/flv.c output/flv_bytestream.c filters/filters.c \ filters/video/video.c filters/video/source.c filters/video/internal.c \ filters/video/resize.c filters/video/fix_vfr_pts.c \ @@ -246,6 +249,8 @@ $(SONAME): $(GENERATED) .depend $(OBJS) $(OBJASM) $(OBJSO) $(LD)$@ $(OBJS) $(OBJASM) $(OBJSO) $(SOFLAGS) $(LDFLAGS) +$(IMPLIBNAME): $(SONAME) + ifneq ($(EXE),) .PHONY: x264 checkasm8 checkasm10 example x264: x264$(EXE) @@ -266,6 +271,9 @@ example$(EXE): $(GENERATED) .depend $(OBJEXAMPLE) $(LIBX264) $(LD)$@ $(OBJEXAMPLE) $(LIBX264) $(LDFLAGS) +$(OBJS) $(OBJSO): CFLAGS += $(CFLAGSSO) +$(OBJCLI): CFLAGS += $(CFLAGSCLI) + $(OBJS) $(OBJASM) $(OBJSO) $(OBJCLI) $(OBJCHK) $(OBJCHK_8) $(OBJCHK_10) $(OBJEXAMPLE): .depend %.o: %.c @@ -336,7 +344,7 @@ include .depend endif -OBJPROF = $(OBJS) $(OBJCLI) +OBJPROF = $(OBJS) $(OBJSO) $(OBJCLI) # These should cover most of the important codepaths OPT0 = --crf 30 -b1 -m1 -r1 --me dia --no-cabac --direct temporal --ssim --no-weightb OPT1 = --crf 16 -b2 -m3 -r3 --me hex --no-8x8dct --direct spatial --no-dct-decimate -t0 --slice-max-mbs 50 @@ -354,7 +362,7 @@ @echo 'i.e. YUV with resolution in the filename, y4m, or avisynth.' else fprofiled: clean - $(MAKE) x264$(EXE) CFLAGS="$(CFLAGS) $(PROF_GEN_CC)" LDFLAGS="$(LDFLAGS) $(PROF_GEN_LD)" + $(MAKE) x264$(EXE) CFLAGSPROF="$(PROF_GEN_CC)" LDFLAGSPROF="$(PROF_GEN_LD)" $(foreach V, $(VIDS), $(foreach I, 0 1 2 3 4 5 6 7, ./x264$(EXE) $(OPT$I) --threads 1 $(V) -o $(DEVNULL) ;)) ifeq ($(COMPILER),CL) # Because Visual Studio timestamps the object files within the PGD, it fails to build if they change - only the executable should be deleted @@ -362,7 +370,7 @@ else rm -f $(OBJPROF) endif - $(MAKE) CFLAGS="$(CFLAGS) $(PROF_USE_CC)" LDFLAGS="$(LDFLAGS) $(PROF_USE_LD)" + $(MAKE) CFLAGSPROF="$(PROF_USE_CC)" LDFLAGSPROF="$(PROF_USE_LD)" rm -f $(OBJPROF:%.o=%.gcda) $(OBJPROF:%.o=%.gcno) *.dyn pgopti.dpi pgopti.dpi.lock *.pgd *.pgc endif diff -Nru x264-0.157.2935+git545de2f/output/flv_bytestream.c x264-0.160.3011+gitcde9a93/output/flv_bytestream.c --- x264-0.157.2935+git545de2f/output/flv_bytestream.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/flv_bytestream.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.c: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Kieran Kunhya * diff -Nru x264-0.157.2935+git545de2f/output/flv_bytestream.h x264-0.160.3011+gitcde9a93/output/flv_bytestream.h --- x264-0.157.2935+git545de2f/output/flv_bytestream.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/flv_bytestream.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv_bytestream.h: flv muxer utilities ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Kieran Kunhya * diff -Nru x264-0.157.2935+git545de2f/output/flv.c x264-0.160.3011+gitcde9a93/output/flv.c --- x264-0.157.2935+git545de2f/output/flv.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/flv.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * flv.c: flv muxer ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Kieran Kunhya * @@ -332,7 +332,7 @@ if( x264_is_regular_file( c->fp ) && total_duration > 0 ) { double framerate; - uint64_t filesize = ftell( c->fp ); + int64_t filesize = ftell( c->fp ); if( p_flv->i_framerate_pos ) { @@ -342,7 +342,7 @@ CHECK( rewrite_amf_double( c->fp, p_flv->i_duration_pos, total_duration ) ); CHECK( rewrite_amf_double( c->fp, p_flv->i_filesize_pos, filesize ) ); - CHECK( rewrite_amf_double( c->fp, p_flv->i_bitrate_pos, filesize * 8 / ( total_duration * 1000 ) ) ); + CHECK( rewrite_amf_double( c->fp, p_flv->i_bitrate_pos, filesize * 8.0 / ( total_duration * 1000 ) ) ); } ret = 0; diff -Nru x264-0.157.2935+git545de2f/output/matroska.c x264-0.160.3011+gitcde9a93/output/matroska.c --- x264-0.157.2935+git545de2f/output/matroska.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/matroska.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska.c: matroska muxer ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.157.2935+git545de2f/output/matroska_ebml.c x264-0.160.3011+gitcde9a93/output/matroska_ebml.c --- x264-0.157.2935+git545de2f/output/matroska_ebml.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/matroska_ebml.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.c: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.157.2935+git545de2f/output/matroska_ebml.h x264-0.160.3011+gitcde9a93/output/matroska_ebml.h --- x264-0.157.2935+git545de2f/output/matroska_ebml.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/matroska_ebml.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * matroska_ebml.h: matroska muxer utilities ***************************************************************************** - * Copyright (C) 2005-2018 x264 project + * Copyright (C) 2005-2020 x264 project * * Authors: Mike Matsnev * diff -Nru x264-0.157.2935+git545de2f/output/mp4.c x264-0.160.3011+gitcde9a93/output/mp4.c --- x264-0.157.2935+git545de2f/output/mp4.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/mp4.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mp4.c: mp4 muxer ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/output/mp4_lsmash.c x264-0.160.3011+gitcde9a93/output/mp4_lsmash.c --- x264-0.157.2935+git545de2f/output/mp4_lsmash.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/mp4_lsmash.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * mp4_lsmash.c: mp4 muxer using L-SMASH ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/output/output.h x264-0.160.3011+gitcde9a93/output/output.h --- x264-0.157.2935+git545de2f/output/output.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/output.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * output.h: x264 file output modules ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/output/raw.c x264-0.160.3011+gitcde9a93/output/raw.c --- x264-0.157.2935+git545de2f/output/raw.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/output/raw.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * raw.c: raw muxer ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt diff -Nru x264-0.157.2935+git545de2f/tools/bash-autocomplete.sh x264-0.160.3011+gitcde9a93/tools/bash-autocomplete.sh --- x264-0.157.2935+git545de2f/tools/bash-autocomplete.sh 1970-01-01 00:00:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/tools/bash-autocomplete.sh 2020-07-13 10:30:21.000000000 +0000 @@ -0,0 +1,15 @@ +_x264() +{ + local path args cur prev + + path="${COMP_LINE%%[[:blank:]]*}" + args="${COMP_LINE:${#path}:$((COMP_POINT-${#path}))}" + cur="${args##*[[:blank:]=]}" + prev="$(sed 's/[[:blank:]=]*$//; s/^.*[[:blank:]]//' <<< "${args%%"$cur"}")" + + # Expand ~ + printf -v path '%q' "$path" && eval path="${path/#'\~'/'~'}" + + COMPREPLY=($("$path" --autocomplete "$prev" "$cur")) && compopt +o default +} 2>/dev/null +complete -o default -F _x264 x264 diff -Nru x264-0.157.2935+git545de2f/tools/checkasm-aarch64.S x264-0.160.3011+gitcde9a93/tools/checkasm-aarch64.S --- x264-0.157.2935+git545de2f/tools/checkasm-aarch64.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/tools/checkasm-aarch64.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * checkasm-aarch64.S: assembly check tool ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Martin Storsjo * diff -Nru x264-0.157.2935+git545de2f/tools/checkasm-a.asm x264-0.160.3011+gitcde9a93/tools/checkasm-a.asm --- x264-0.157.2935+git545de2f/tools/checkasm-a.asm 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/tools/checkasm-a.asm 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ ;***************************************************************************** ;* checkasm-a.asm: assembly check tool ;***************************************************************************** -;* Copyright (C) 2008-2018 x264 project +;* Copyright (C) 2008-2020 x264 project ;* ;* Authors: Loren Merritt ;* Henrik Gramner @@ -58,7 +58,6 @@ cextern_naked puts ; max number of args used by any x264 asm function. -; (max_args % 4) must equal 3 for stack alignment %define max_args 15 %if ARCH_X86_64 @@ -88,7 +87,7 @@ ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ) ;----------------------------------------------------------------------------- INIT_XMM -cglobal checkasm_call, 2,15,16,max_args*8+8 +cglobal checkasm_call, 2,15,16,-1*(((max_args+1)*8+STACK_ALIGNMENT-1) & ~(STACK_ALIGNMENT-1)) mov r6, r0 mov [rsp+max_args*8], r1 @@ -103,14 +102,14 @@ mov r5, r11mp %assign i 6 %rep max_args-6 - mov r9, [rsp+stack_offset+(i+1)*8] + mov r9, [rstk+stack_offset+(i+1)*8] mov [rsp+(i-6)*8], r9 %assign i i+1 %endrep %else %assign i 4 %rep max_args-4 - mov r9, [rsp+stack_offset+(i+7)*8] + mov r9, [rstk+stack_offset+(i+7)*8] mov [rsp+i*8], r9 %assign i i+1 %endrep @@ -176,16 +175,19 @@ ;----------------------------------------------------------------------------- ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ) ;----------------------------------------------------------------------------- -cglobal checkasm_call, 1,7 +cglobal checkasm_call, 2,7,0,-1*(((max_args+1)*4+STACK_ALIGNMENT-1) & ~(STACK_ALIGNMENT-1)) + mov [esp+max_args*4], r1 +%assign i 0 +%rep max_args + mov r1, [rstk+stack_offset+12+i*4] + mov [esp+i*4], r1 + %assign i i+1 +%endrep mov r3, n3 mov r4, n4 mov r5, n5 mov r6, n6 -%rep max_args - push dword [esp+24+max_args*4] -%endrep call r0 - add esp, max_args*4 xor r3, n3 xor r4, n4 xor r5, n5 @@ -197,10 +199,9 @@ mov r3, eax mov r4, edx lea r1, [error_message] - push r1 + mov [esp], r1 call puts - add esp, 4 - mov r1, r1m + mov r1, [esp+max_args*4] mov dword [r1], 0 mov edx, r4 mov eax, r3 diff -Nru x264-0.157.2935+git545de2f/tools/checkasm-arm.S x264-0.160.3011+gitcde9a93/tools/checkasm-arm.S --- x264-0.157.2935+git545de2f/tools/checkasm-arm.S 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/tools/checkasm-arm.S 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /**************************************************************************** * checkasm-arm.S: assembly check tool ***************************************************************************** - * Copyright (C) 2015-2018 x264 project + * Copyright (C) 2015-2020 x264 project * * Authors: Martin Storsjo * diff -Nru x264-0.157.2935+git545de2f/tools/checkasm.c x264-0.160.3011+gitcde9a93/tools/checkasm.c --- x264-0.157.2935+git545de2f/tools/checkasm.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/tools/checkasm.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * checkasm.c: assembly check tool ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -110,7 +110,7 @@ return a; } -static bench_t* get_bench( const char *name, int cpu ) +static bench_t* get_bench( const char *name, uint32_t cpu ) { int i, j; for( i = 0; benchs[i].name && strcmp(name, benchs[i].name); i++ ) @@ -175,7 +175,7 @@ if( k < j ) continue; printf( "%s_%s%s: %"PRId64"\n", benchs[i].name, -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_AVX512 ? "avx512" : b->cpu&X264_CPU_AVX2 ? "avx2" : b->cpu&X264_CPU_BMI2 ? "bmi2" : @@ -206,7 +206,7 @@ b->cpu&X264_CPU_MSA ? "msa" : #endif "c", -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_CACHELINE_32 ? "_c32" : b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" : b->cpu&X264_CPU_CACHELINE_64 ? "_c64" : @@ -229,7 +229,7 @@ static void (*simd_warmup_func)( void ) = NULL; #define simd_warmup() do { if( simd_warmup_func ) simd_warmup_func(); } while( 0 ) -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX int x264_stack_pagealign( int (*func)(), int align ); void x264_checkasm_warmup_avx( void ); void x264_checkasm_warmup_avx512( void ); @@ -241,11 +241,11 @@ #define x264_stack_pagealign( func, align ) func() #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); #endif -#if ARCH_ARM +#if HAVE_ARMV6 intptr_t x264_checkasm_call_neon( intptr_t (*func)(), int *ok, ... ); intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... ); intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon; @@ -253,7 +253,7 @@ #define call_c1(func,...) func(__VA_ARGS__) -#if ARCH_X86_64 +#if HAVE_MMX && ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. * This is done by clobbering the stack with junk around the stack pointer and calling the * assembly function through x264_checkasm_call with added dummy arguments which forces all @@ -269,19 +269,19 @@ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \ simd_warmup(); \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_AARCH64 && !defined(__APPLE__) +#elif HAVE_AARCH64 && !defined(__APPLE__) void x264_checkasm_stack_clobber( uint64_t clobber, ... ); #define call_a1(func,...) ({ \ uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+8 */ \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_X86 || ARCH_ARM +#elif HAVE_MMX || HAVE_ARMV6 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1 call_c1 #endif -#if ARCH_ARM +#if HAVE_ARMV6 #define call_a1_64(func,...) ((uint64_t (*)(intptr_t(*)(), int*, ...))x264_checkasm_call)( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1_64 call_a1 @@ -323,7 +323,7 @@ #define call_a64(func,...) ({ call_a2(func,__VA_ARGS__); call_a1_64(func,__VA_ARGS__); }) -static int check_pixel( int cpu_ref, int cpu_new ) +static int check_pixel( uint32_t cpu_ref, uint32_t cpu_new ) { x264_pixel_function_t pixel_c; x264_pixel_function_t pixel_ref; @@ -596,13 +596,13 @@ #define TEST_INTRA_X3( name, i8x8, ... ) \ if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \ { \ - ALIGNED_16( int res_c[3] ); \ - ALIGNED_16( int res_asm[3] ); \ + ALIGNED_16( int res_c[4] ); \ + ALIGNED_16( int res_asm[4] ); \ set_func_name( #name ); \ used_asm = 1; \ call_c( pixel_c.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_c ); \ call_a( pixel_asm.name, pbuf1+48, i8x8 ? edge : pbuf3+48, res_asm ); \ - if( memcmp(res_c, res_asm, sizeof(res_c)) ) \ + if( memcmp(res_c, res_asm, 3 * sizeof(*res_c)) ) \ { \ ok = 0; \ fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \ @@ -619,8 +619,8 @@ ALIGNED_ARRAY_64( uint16_t, bitcosts,[17] ); \ for( int i=0; i<17; i++ ) \ bitcosts[i] = 9*(i!=8); \ - memcpy( pbuf3, pbuf2, 20*FDEC_STRIDE*sizeof(pixel) ); \ - memcpy( pbuf4, pbuf2, 20*FDEC_STRIDE*sizeof(pixel) ); \ + memcpy( pbuf3, pbuf2, 20*FDEC_STRIDE*SIZEOF_PIXEL ); \ + memcpy( pbuf4, pbuf2, 20*FDEC_STRIDE*SIZEOF_PIXEL ); \ for( int i=0; i<32; i++ ) \ { \ pixel *fenc = pbuf1+48+i*12; \ @@ -643,7 +643,7 @@ fprintf( stderr, #name": %d,%d != %d,%d [FAILED]\n", res_c>>16, res_c&0xffff, res_a>>16, res_a&0xffff ); \ break; \ } \ - if( memcmp(fdec1, fdec2, 4*FDEC_STRIDE*sizeof(pixel)) ) \ + if( memcmp(fdec1, fdec2, 4*FDEC_STRIDE*SIZEOF_PIXEL) ) \ { \ ok = 0; \ fprintf( stderr, #name" [FAILED]\n" ); \ @@ -700,7 +700,7 @@ break; \ } \ for( int j=0; j<8; j++ ) \ - if( memcmp(fdec1+j*FDEC_STRIDE, fdec2+j*FDEC_STRIDE, 8*sizeof(pixel)) ) \ + if( memcmp(fdec1+j*FDEC_STRIDE, fdec2+j*FDEC_STRIDE, 8*SIZEOF_PIXEL) ) \ ok = 0; \ if( !ok ) \ { \ @@ -724,7 +724,7 @@ } \ } - memcpy( pbuf3, pbuf2, 20*FDEC_STRIDE*sizeof(pixel) ); + memcpy( pbuf3, pbuf2, 20*FDEC_STRIDE*SIZEOF_PIXEL ); ok = 1; used_asm = 0; TEST_INTRA_X3( intra_satd_x3_16x16, 0 ); TEST_INTRA_X3( intra_satd_x3_8x16c, 0 ); @@ -780,7 +780,7 @@ x264_emms(); res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); - if( fabs( res_c - res_a ) > 1e-6 ) + if( fabs( res_c - res_a ) > 1e-5 ) { ok = 0; fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a ); @@ -834,7 +834,7 @@ return ret; } -static int check_dct( int cpu_ref, int cpu_new ) +static int check_dct( uint32_t cpu_ref, uint32_t cpu_new ) { x264_dct_function_t dct_c; x264_dct_function_t dct_ref; @@ -956,13 +956,13 @@ { \ set_func_name( #name ); \ used_asm = 1; \ - memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \ - memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \ + memcpy( pbuf3, pbuf1, 32*32 * SIZEOF_PIXEL ); \ + memcpy( pbuf4, pbuf1, 32*32 * SIZEOF_PIXEL ); \ memcpy( dct1, src, 256 * sizeof(dctcoef) ); \ memcpy( dct2, src, 256 * sizeof(dctcoef) ); \ call_c1( dct_c.name, pbuf3, (void*)dct1 ); \ call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \ - if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \ + if( memcmp( pbuf3, pbuf4, 32*32 * SIZEOF_PIXEL ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -1082,11 +1082,11 @@ int nz_a, nz_c; \ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \ used_asm = 1; \ - memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \ - memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \ + memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * SIZEOF_PIXEL ); \ + memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * SIZEOF_PIXEL ); \ nz_c = call_c1( zigzag_c[interlace].name, t1, pbuf2, pbuf3 ); \ nz_a = call_a1( zigzag_asm[interlace].name, t2, pbuf2, pbuf4 ); \ - if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \ + if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*SIZEOF_PIXEL ) || nz_c != nz_a ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -1104,16 +1104,16 @@ used_asm = 1; \ for( int i = 0; i < 2; i++ ) \ { \ - memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \ - memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \ + memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * SIZEOF_PIXEL ); \ + memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * SIZEOF_PIXEL ); \ for( int j = 0; j < 4; j++ ) \ { \ - memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \ - memcpy( pbuf4 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \ + memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * SIZEOF_PIXEL ); \ + memcpy( pbuf4 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * SIZEOF_PIXEL ); \ } \ nz_c = call_c1( zigzag_c[interlace].name, t1, pbuf2, pbuf3, &dc_c ); \ nz_a = call_a1( zigzag_asm[interlace].name, t2, pbuf2, pbuf4, &dc_a ); \ - if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \ + if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * SIZEOF_PIXEL ) || nz_c != nz_a || dc_c != dc_a ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ @@ -1168,7 +1168,7 @@ return ret; } -static int check_mc( int cpu_ref, int cpu_new ) +static int check_mc( uint32_t cpu_ref, uint32_t cpu_new ) { x264_mc_functions_t mc_c; x264_mc_functions_t mc_ref; @@ -1198,7 +1198,7 @@ pbuf3[i] = pbuf4[i] = 0xCD; \ call_c( mc_c.mc_luma, dst1, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \ call_a( mc_a.mc_luma, dst2, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \ - if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \ + if( memcmp( pbuf3, pbuf4, 1024 * SIZEOF_PIXEL ) ) \ { \ fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ ok = 0; \ @@ -1208,7 +1208,7 @@ { \ pixel *ref = dst2; \ intptr_t ref_stride = 32; \ - int w_checked = ( ( sizeof(pixel) == 2 && (w == 12 || w == 20)) ? w-2 : w ); \ + int w_checked = ( ( SIZEOF_PIXEL == 2 && (w == 12 || w == 20)) ? w-2 : w ); \ const x264_weight_t *weight = x264_weight_none; \ set_func_name( "get_ref_%dx%d", w_checked, h ); \ used_asm = 1; \ @@ -1217,7 +1217,7 @@ call_c( mc_c.mc_luma, dst1, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \ ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, (intptr_t)64, dx, dy, w, h, weight ); \ for( int i = 0; i < h; i++ ) \ - if( memcmp( dst1+i*32, ref+i*ref_stride, w_checked * sizeof(pixel) ) ) \ + if( memcmp( dst1+i*32, ref+i*ref_stride, w_checked * SIZEOF_PIXEL ) ) \ { \ fprintf( stderr, "get_ref[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w_checked, h ); \ ok = 0; \ @@ -1241,7 +1241,7 @@ dst2[i+j*16+8] = dst1[i+j*16+8]; \ dst2[i+j*16 ] = dst1[i+j*16 ]; \ } \ - if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \ + if( memcmp( pbuf3, pbuf4, 1024 * SIZEOF_PIXEL ) ) \ { \ fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ ok = 0; \ @@ -1285,15 +1285,15 @@ { \ for( int i = 0; i < 12; i++ ) \ { \ - memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \ - memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \ + memcpy( pbuf3, pbuf1+320, 320 * SIZEOF_PIXEL ); \ + memcpy( pbuf4, pbuf1+320, 320 * SIZEOF_PIXEL ); \ if( mc_a.name[i] != mc_ref.name[i] ) \ { \ set_func_name( "%s_%s", #name, pixel_names[i] ); \ used_asm = 1; \ call_c1( mc_c.name[i], pbuf3, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \ call_a1( mc_a.name[i], pbuf4, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \ - if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \ + if( memcmp( pbuf3, pbuf4, 320 * SIZEOF_PIXEL ) ) \ { \ ok = 0; \ fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \ @@ -1316,8 +1316,8 @@ ALIGNED_16( pixel buffC[640] ); \ ALIGNED_16( pixel buffA[640] ); \ int j = X264_MAX( i*4, 2 ); \ - memset( buffC, 0, 640 * sizeof(pixel) ); \ - memset( buffA, 0, 640 * sizeof(pixel) ); \ + memset( buffC, 0, 640 * SIZEOF_PIXEL ); \ + memset( buffA, 0, 640 * SIZEOF_PIXEL ); \ x264_t ha; \ ha.mc = mc_a; \ /* w12 is the same as w16 in some cases */ \ @@ -1331,7 +1331,7 @@ mc_a.weight_cache(&ha, &weight); \ call_a1( weight.weightfn[i], buffA, (intptr_t)32, pbuf2+align_off, (intptr_t)32, &weight, 16 ); \ for( int k = 0; k < 16; k++ ) \ - if( memcmp( &buffC[k*32], &buffA[k*32], j * sizeof(pixel) ) ) \ + if( memcmp( &buffC[k*32], &buffA[k*32], j * SIZEOF_PIXEL ) ) \ { \ ok = 0; \ fprintf( stderr, #name "[%d]: [FAILED] s:%d o:%d d%d\n", i, s, o, d ); \ @@ -1440,7 +1440,7 @@ { set_func_name( "plane_copy" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = plane_specs[i].w; int h = plane_specs[i].h; @@ -1448,12 +1448,12 @@ intptr_t dst_stride = (w + 127) & ~63; assert( dst_stride * h <= 0x1000 ); pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); - memset( pbuf3, 0, 0x1000*sizeof(pixel) ); - memset( pbuf4, 0, 0x1000*sizeof(pixel) ); + memset( pbuf3, 0, 0x1000*SIZEOF_PIXEL ); + memset( pbuf4, 0, 0x1000*SIZEOF_PIXEL ); call_c( mc_c.plane_copy, pbuf3, dst_stride, src1, src_stride, w, h ); call_a( mc_a.plane_copy, pbuf4, dst_stride, src1, src_stride, w, h ); for( int y = 0; y < h; y++ ) - if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*SIZEOF_PIXEL ) ) { ok = 0; fprintf( stderr, "plane_copy FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); @@ -1466,7 +1466,7 @@ { set_func_name( "plane_copy_swap" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; int h = plane_specs[i].h; @@ -1474,12 +1474,12 @@ intptr_t dst_stride = (2*w + 127) & ~63; assert( dst_stride * h <= 0x1000 ); pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); - memset( pbuf3, 0, 0x1000*sizeof(pixel) ); - memset( pbuf4, 0, 0x1000*sizeof(pixel) ); + memset( pbuf3, 0, 0x1000*SIZEOF_PIXEL ); + memset( pbuf4, 0, 0x1000*SIZEOF_PIXEL ); call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h ); call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h ); for( int y = 0; y < h; y++ ) - if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*SIZEOF_PIXEL ) ) { ok = 0; fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); @@ -1492,7 +1492,7 @@ { set_func_name( "plane_copy_interleave" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; int h = plane_specs[i].h; @@ -1500,12 +1500,12 @@ intptr_t dst_stride = (2*w + 127) & ~63; assert( dst_stride * h <= 0x1000 ); pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); - memset( pbuf3, 0, 0x1000*sizeof(pixel) ); - memset( pbuf4, 0, 0x1000*sizeof(pixel) ); + memset( pbuf3, 0, 0x1000*SIZEOF_PIXEL ); + memset( pbuf4, 0, 0x1000*SIZEOF_PIXEL ); call_c( mc_c.plane_copy_interleave, pbuf3, dst_stride, src1, src_stride, src1+1024, src_stride+16, w, h ); call_a( mc_a.plane_copy_interleave, pbuf4, dst_stride, src1, src_stride, src1+1024, src_stride+16, w, h ); for( int y = 0; y < h; y++ ) - if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*SIZEOF_PIXEL ) ) { ok = 0; fprintf( stderr, "plane_copy_interleave FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); @@ -1518,7 +1518,7 @@ { set_func_name( "plane_copy_deinterleave" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; int h = plane_specs[i].h; @@ -1544,11 +1544,11 @@ { set_func_name( "plane_copy_deinterleave_yuyv" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; int h = plane_specs[i].h; - intptr_t dst_stride = ALIGN( w, 32/sizeof(pixel) ); + intptr_t dst_stride = ALIGN( w, 32/SIZEOF_PIXEL ); intptr_t src_stride = (plane_specs[i].src_stride + 1) >> 1; intptr_t offv = dst_stride*h; pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); @@ -1558,8 +1558,8 @@ call_c1( mc_c.plane_copy_deinterleave_yuyv, pbuf3, dst_stride, pbuf3+offv, dst_stride, src1, src_stride, w, h ); call_a1( mc_a.plane_copy_deinterleave_yuyv, pbuf4, dst_stride, pbuf4+offv, dst_stride, src1, src_stride, w, h ); for( int y = 0; y < h; y++ ) - if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) || - memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(pixel) ) ) + if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*SIZEOF_PIXEL ) || + memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*SIZEOF_PIXEL ) ) { fprintf( stderr, "plane_copy_deinterleave_yuyv FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride ); break; @@ -1571,20 +1571,21 @@ { set_func_name( "plane_copy_deinterleave_rgb" ); used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 2) >> 2; int h = plane_specs[i].h; intptr_t src_stride = plane_specs[i].src_stride; intptr_t dst_stride = ALIGN( w, 16 ); intptr_t offv = dst_stride*h + 16; + pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1); for( int pw = 3; pw <= 4; pw++ ) { memset( pbuf3, 0, 0x1000 ); memset( pbuf4, 0, 0x1000 ); - call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); - call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h ); + call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, src1, src_stride, pw, w, h ); + call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, src1, src_stride, pw, w, h ); for( int y = 0; y < h; y++ ) if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) || memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) || @@ -1603,12 +1604,12 @@ { set_func_name( "plane_copy_deinterleave_v210" ); ok = 1; used_asm = 1; - for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ ) + for( int i = 0; i < ARRAY_ELEMS(plane_specs); i++ ) { int w = (plane_specs[i].w + 1) >> 1; int h = plane_specs[i].h; intptr_t dst_stride = ALIGN( w, 32 ); - intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t); + intptr_t src_stride = (w + 47) / 48 * 128 / (int)sizeof(uint32_t); intptr_t offv = dst_stride*h + 32; memset( pbuf3, 0, 0x1000 ); memset( pbuf4, 0, 0x1000 ); @@ -1634,14 +1635,14 @@ void *tmp = pbuf3+49*64; set_func_name( "hpel_filter" ); ok = 1; used_asm = 1; - memset( pbuf3, 0, 4096 * sizeof(pixel) ); - memset( pbuf4, 0, 4096 * sizeof(pixel) ); + memset( pbuf3, 0, 4096 * SIZEOF_PIXEL ); + memset( pbuf4, 0, 4096 * SIZEOF_PIXEL ); call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, (intptr_t)64, 48, 10, tmp ); call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, (intptr_t)64, 48, 10, tmp ); for( int i = 0; i < 3; i++ ) for( int j = 0; j < 10; j++ ) //FIXME ideally the first pixels would match too, but they aren't actually used - if( memcmp( dstc[i]+j*64+2, dsta[i]+j*64+2, 43 * sizeof(pixel) ) ) + if( memcmp( dstc[i]+j*64+2, dsta[i]+j*64+2, 43 * SIZEOF_PIXEL ) ) { ok = 0; fprintf( stderr, "hpel filter differs at plane %c line %d\n", "hvc"[i], j ); @@ -1671,7 +1672,7 @@ for( int i = 0; i < 8; i++ ) { for( int j = 0; j < 4; j++ ) - if( memcmp( dstc[j]+i*stride_lowres, dsta[j]+i*stride_lowres, w * sizeof(pixel) ) ) + if( memcmp( dstc[j]+i*stride_lowres, dsta[j]+i*stride_lowres, w * SIZEOF_PIXEL ) ) { ok = 0; fprintf( stderr, "frame_init_lowres differs at plane %d line %d\n", j, i ); @@ -1862,7 +1863,7 @@ ok = 1; used_asm = 1; for( size_t size = 16; size < 512; size += 16 ) { - for( int i = 0; i < size; i++ ) + for( size_t i = 0; i < size; i++ ) buf1[i] = rand(); memset( buf4-1, 0xAA, size + 2 ); call_c( mc_c.memcpy_aligned, buf3, buf1, size ); @@ -1899,7 +1900,7 @@ return ret; } -static int check_deblock( int cpu_ref, int cpu_new ) +static int check_deblock( uint32_t cpu_ref, uint32_t cpu_new ) { x264_deblock_function_t db_c; x264_deblock_function_t db_ref; @@ -1931,14 +1932,14 @@ for( int j = 0; j < 1024; j++ ) \ /* two distributions of random to excersize different failure modes */ \ pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \ - memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \ + memcpy( pbuf4, pbuf3, 1024 * SIZEOF_PIXEL ); \ if( db_a.name != db_ref.name ) \ { \ set_func_name( #name ); \ used_asm = 1; \ call_c1( db_c.name, pbuf3+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \ call_a1( db_a.name, pbuf4+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \ - if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \ + if( memcmp( pbuf3, pbuf4, 1024 * SIZEOF_PIXEL ) ) \ { \ ok = 0; \ fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \ @@ -2012,7 +2013,7 @@ return ret; } -static int check_quant( int cpu_ref, int cpu_new ) +static int check_quant( uint32_t cpu_ref, uint32_t cpu_new ) { x264_quant_function_t qf_c; x264_quant_function_t qf_ref; @@ -2425,7 +2426,7 @@ return ret; } -static int check_intra( int cpu_ref, int cpu_new ) +static int check_intra( uint32_t cpu_ref, uint32_t cpu_new ) { int ret = 0, ok = 1, used_asm = 0; ALIGNED_ARRAY_32( pixel, edge,[36] ); @@ -2459,7 +2460,7 @@ x264_predict_8x8_init( cpu_new, ip_a.predict_8x8, &ip_a.predict_8x8_filter ); x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 ); - memcpy( fdec, pbuf1, 32*20 * sizeof(pixel) );\ + memcpy( fdec, pbuf1, 32*20 * SIZEOF_PIXEL );\ ip_c.predict_8x8_filter( fdec+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS ); @@ -2468,13 +2469,13 @@ {\ set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\ used_asm = 1;\ - memcpy( pbuf3, fdec, FDEC_STRIDE*20 * sizeof(pixel) );\ - memcpy( pbuf4, fdec, FDEC_STRIDE*20 * sizeof(pixel) );\ - for( int a = 0; a < (do_bench ? 64/sizeof(pixel) : 1); a += align )\ + memcpy( pbuf3, fdec, FDEC_STRIDE*20 * SIZEOF_PIXEL );\ + memcpy( pbuf4, fdec, FDEC_STRIDE*20 * SIZEOF_PIXEL );\ + for( int a = 0; a < (do_bench ? 64/SIZEOF_PIXEL : 1); a += align )\ {\ call_c##bench( ip_c.name[dir], pbuf3+48+a, ##__VA_ARGS__ );\ call_a##bench( ip_a.name[dir], pbuf4+48+a, ##__VA_ARGS__ );\ - if( memcmp( pbuf3, pbuf4, FDEC_STRIDE*20 * sizeof(pixel) ) )\ + if( memcmp( pbuf3, pbuf4, FDEC_STRIDE*20 * SIZEOF_PIXEL ) )\ {\ fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\ ok = 0;\ @@ -2526,13 +2527,13 @@ if( !(i&7) || ((i&MB_TOPRIGHT) && !(i&MB_TOP)) ) continue; int neighbor = (i&24)>>1; - memset( edge, 0, 36*sizeof(pixel) ); - memset( edge2, 0, 36*sizeof(pixel) ); + memset( edge, 0, 36*SIZEOF_PIXEL ); + memset( edge2, 0, 36*SIZEOF_PIXEL ); call_c( ip_c.predict_8x8_filter, pbuf1+48, edge, neighbor, i&7 ); call_a( ip_a.predict_8x8_filter, pbuf1+48, edge2, neighbor, i&7 ); if( !(neighbor&MB_TOPLEFT) ) edge[15] = edge2[15] = 0; - if( memcmp( edge+7, edge2+7, (i&MB_TOPRIGHT ? 26 : i&MB_TOP ? 17 : 8) * sizeof(pixel) ) ) + if( memcmp( edge+7, edge2+7, (i&MB_TOPRIGHT ? 26 : i&MB_TOP ? 17 : 8) * SIZEOF_PIXEL ) ) { fprintf( stderr, "predict_8x8_filter : [FAILED] %d %d\n", (i&24)>>1, i&7); ok = 0; @@ -2600,7 +2601,7 @@ DECL_CABAC(c) #if HAVE_MMX DECL_CABAC(asm) -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 DECL_CABAC(asm) #else #define run_cabac_decision_asm run_cabac_decision_c @@ -2610,7 +2611,7 @@ extern const uint8_t x264_count_cat_m1[14]; -static int check_cabac( int cpu_ref, int cpu_new ) +static int check_cabac( uint32_t cpu_ref, uint32_t cpu_new ) { int ret = 0, ok = 1, used_asm = 0; x264_t h; @@ -2725,7 +2726,7 @@ return ret; } -static int check_bitstream( int cpu_ref, int cpu_new ) +static int check_bitstream( uint32_t cpu_ref, uint32_t cpu_new ) { x264_bitstream_function_t bs_c; x264_bitstream_function_t bs_ref; @@ -2775,7 +2776,7 @@ return ret; } -static int check_all_funcs( int cpu_ref, int cpu_new ) +static int check_all_funcs( uint32_t cpu_ref, uint32_t cpu_new ) { return check_pixel( cpu_ref, cpu_new ) + check_dct( cpu_ref, cpu_new ) @@ -2787,7 +2788,7 @@ + check_bitstream( cpu_ref, cpu_new ); } -static int add_flags( int *cpu_ref, int *cpu_new, int flags, const char *name ) +static int add_flags( uint32_t *cpu_ref, uint32_t *cpu_new, uint32_t flags, const char *name ) { *cpu_ref = *cpu_new; *cpu_new |= flags; @@ -2804,9 +2805,9 @@ static int check_all_flags( void ) { int ret = 0; - int cpu0 = 0, cpu1 = 0; + uint32_t cpu0 = 0, cpu1 = 0; uint32_t cpu_detect = x264_cpu_detect(); -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX if( cpu_detect & X264_CPU_AVX512 ) simd_warmup_func = x264_checkasm_warmup_avx512; else if( cpu_detect & X264_CPU_AVX ) @@ -2814,7 +2815,7 @@ #endif simd_warmup(); -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 if( cpu_detect & X264_CPU_MMX2 ) { ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMX2, "MMX" ); @@ -2913,7 +2914,7 @@ return ret; } -static int main_internal( int argc, char **argv ) +REALIGN_STACK int main( int argc, char **argv ) { #ifdef _WIN32 /* Disable the Windows Error Reporting dialog */ @@ -2940,8 +2941,8 @@ fprintf( stderr, "x264: using random seed %u\n", seed ); srand( seed ); - buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) ); - pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) ); + buf1 = x264_malloc( 0x1e00 + 0x2000*SIZEOF_PIXEL ); + pbuf1 = x264_malloc( 0x1e00*SIZEOF_PIXEL ); if( !buf1 || !pbuf1 ) { fprintf( stderr, "malloc failed, unable to initiate tests!\n" ); @@ -2950,7 +2951,7 @@ #define INIT_POINTER_OFFSETS\ buf2 = buf1 + 0xf00;\ buf3 = buf2 + 0xf00;\ - buf4 = buf3 + 0x1000*sizeof(pixel);\ + buf4 = buf3 + 0x1000*SIZEOF_PIXEL;\ pbuf2 = pbuf1 + 0xf00;\ pbuf3 = (pixel*)buf3;\ pbuf4 = (pixel*)buf4; @@ -2960,7 +2961,7 @@ buf1[i] = rand() & 0xFF; pbuf1[i] = rand() & PIXEL_MAX; } - memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) ); + memset( buf1+0x1e00, 0, 0x2000*SIZEOF_PIXEL ); if( x264_stack_pagealign( check_all_flags, 0 ) ) { @@ -2972,8 +2973,3 @@ print_bench(); return 0; } - -int main( int argc, char **argv ) -{ - return x264_stack_align( main_internal, argc, argv ); -} diff -Nru x264-0.157.2935+git545de2f/version.sh x264-0.160.3011+gitcde9a93/version.sh --- x264-0.157.2935+git545de2f/version.sh 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/version.sh 2020-07-13 10:30:22.000000000 +0000 @@ -1,28 +1,5 @@ #!/bin/sh - -cd "$(dirname "$0")" >/dev/null && [ -f x264.h ] || exit 1 - -api="$(grep '#define X264_BUILD' < x264.h | sed 's/^.* \([1-9][0-9]*\).*$/\1/')" -ver="x" -version="" - -if [ -d .git ] && command -v git >/dev/null 2>&1 ; then - localver="$(($(git rev-list HEAD | wc -l)))" - if [ "$localver" -gt 1 ] ; then - ver_diff="$(($(git rev-list origin/master..HEAD | wc -l)))" - ver="$((localver-ver_diff))" - echo "#define X264_REV $ver" - echo "#define X264_REV_DIFF $ver_diff" - if [ "$ver_diff" -ne 0 ] ; then - ver="$ver+$ver_diff" - fi - if git status | grep -q "modified:" ; then - ver="${ver}M" - fi - ver="$ver $(git rev-list -n 1 HEAD | cut -c 1-7)" - version=" r$ver" - fi -fi - -echo "#define X264_VERSION \"$version\"" -echo "#define X264_POINTVER \"0.$api.$ver\"" +# Script modified from upstream source for Debian packaging since packaging +# won't include .git repository. +echo '#define X264_VERSION " r3011 cde9a93"' +echo '#define X264_POINTVER "0.160.3011 cde9a93"' diff -Nru x264-0.157.2935+git545de2f/x264.c x264-0.160.3011+gitcde9a93/x264.c --- x264-0.157.2935+git545de2f/x264.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/x264.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264: top-level x264cli functions ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Loren Merritt * Laurent Aimar @@ -166,11 +166,52 @@ /* video filter operation struct */ static cli_vid_filter_t filter; -static const char * const demuxer_names[] = +const char * const x264_avcintra_class_names[] = { "50", "100", "200", 0 }; +const char * const x264_cqm_names[] = { "flat", "jvt", 0 }; +const char * const x264_log_level_names[] = { "none", "error", "warning", "info", "debug", 0 }; +const char * const x264_partition_names[] = { "p8x8", "p4x4", "b8x8", "i8x8", "i4x4", "none", "all", 0 }; +const char * const x264_pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 }; +const char * const x264_range_names[] = { "auto", "tv", "pc", 0 }; + +const char * const x264_output_csp_names[] = { - "auto", - "raw", - "y4m", +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I400 + "i400", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 + "i420", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 + "i422", +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444 + "i444", "rgb", +#endif + 0 +}; + +const char * const x264_valid_profile_names[] = +{ +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420 +#if HAVE_BITDEPTH8 +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 + "baseline", "main", +#endif + "high", +#endif +#if HAVE_BITDEPTH10 + "high10", +#endif +#endif +#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 + "high422", +#endif + "high444", 0 +}; + +const char * const x264_demuxer_names[] = +{ + "auto", "raw", "y4m", #if HAVE_AVS "avs", #endif @@ -183,36 +224,15 @@ 0 }; -static const char * const muxer_names[] = +const char * const x264_muxer_names[] = { - "auto", - "raw", - "mkv", - "flv", + "auto", "raw", "mkv", "flv", #if HAVE_GPAC || HAVE_LSMASH "mp4", #endif 0 }; -static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 }; -static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 }; -static const char * const output_csp_names[] = -{ -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I400 - "i400", -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 - "i420", -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 - "i422", -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I444 - "i444", "rgb", -#endif - 0 -}; static const char * const chroma_format_names[] = { [0] = "all", @@ -222,8 +242,6 @@ [X264_CSP_I444] = "i444" }; -static const char * const range_names[] = { "auto", "tv", "pc", 0 }; - typedef struct { int mod; @@ -355,8 +373,11 @@ #endif } -static int main_internal( int argc, char **argv ) +REALIGN_STACK int main( int argc, char **argv ) { + if( argc == 4 && !strcmp( argv[1], "--autocomplete" ) ) + return x264_cli_autocomplete( argv[2], argv[3] ); + x264_param_t param; cli_opt_t opt = {0}; int ret = 0; @@ -407,11 +428,6 @@ return ret; } -int main( int argc, char **argv ) -{ - return x264_stack_align( main_internal, argc, argv ); -} - static char const *strtable_lookup( const char * const table[], int idx ) { int i = 0; while( table[i] ) i++; @@ -588,23 +604,7 @@ " - high444:\n" " Support for bit depth 8-10.\n" " Support for 4:2:0/4:2:2/4:4:4 chroma subsampling.\n" ); - else H0( - " - " -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT <= X264_CSP_I420 -#if HAVE_BITDEPTH8 -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I420 - "baseline,main," -#endif - "high," -#endif -#if HAVE_BITDEPTH10 - "high10," -#endif -#endif -#if !X264_CHROMA_FORMAT || X264_CHROMA_FORMAT == X264_CSP_I422 - "high422," -#endif - "high444\n" ); + else H0( " - %s\n", stringify_names( buf, x264_valid_profile_names ) ); H0( " --preset Use a preset to select encoding settings [medium]\n" " Overridden by user settings.\n" ); H2( " - ultrafast:\n" @@ -791,9 +791,8 @@ H1( "Analysis:\n" ); H1( "\n" ); H1( " -A, --partitions Partitions to consider [\"p8x8,b8x8,i8x8,i4x4\"]\n" - " - p8x8, p4x4, b8x8, i8x8, i4x4\n" - " - none, all\n" - " (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n" ); + " - %s\n" + " (p4x4 requires p8x8. i8x8 requires --8x8dct.)\n", stringify_names( buf, x264_partition_names ) ); H1( " --direct Direct MV prediction mode [\"%s\"]\n" " - none, spatial, temporal, auto\n", strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) ); @@ -845,8 +844,8 @@ H2( " --deadzone-inter Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] ); H2( " --deadzone-intra Set the size of the intra luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[1] ); H2( " Deadzones should be in the range 0 - 32.\n" ); - H2( " --cqm Preset quant matrices [\"flat\"]\n" - " - jvt, flat\n" ); + H2( " --cqm Preset quant matrices [\"%s\"]\n" + " - %s\n", x264_cqm_names[0], stringify_names( buf, x264_cqm_names ) ); H1( " --cqmfile Read custom quant matrices from a JM-compatible file\n" ); H2( " Overrides any other --cqm* options.\n" ); H2( " --cqm4 Set all 4x4 quant matrices\n" @@ -869,7 +868,7 @@ " - component, pal, ntsc, secam, mac, undef\n", strtable_lookup( x264_vidformat_names, defaults->vui.i_vidformat ) ); H2( " --range Specify color range [\"%s\"]\n" - " - %s\n", range_names[0], stringify_names( buf, range_names ) ); + " - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) ); H2( " --colorprim Specify color primaries [\"%s\"]\n" " - undef, bt709, bt470m, bt470bg, smpte170m,\n" " smpte240m, film, bt2020, smpte428,\n" @@ -907,24 +906,24 @@ H0( "\n" ); H0( " -o, --output Specify output file\n" ); H1( " --muxer Specify output container format [\"%s\"]\n" - " - %s\n", muxer_names[0], stringify_names( buf, muxer_names ) ); + " - %s\n", x264_muxer_names[0], stringify_names( buf, x264_muxer_names ) ); H1( " --demuxer Specify input container format [\"%s\"]\n" - " - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) ); + " - %s\n", x264_demuxer_names[0], stringify_names( buf, x264_demuxer_names ) ); H1( " --input-fmt Specify input file format (requires lavf support)\n" ); H1( " --input-csp Specify input colorspace format for raw input\n" ); print_csp_names( longhelp ); H1( " --output-csp Specify output colorspace [\"%s\"]\n" " - %s\n", #if X264_CHROMA_FORMAT - output_csp_names[0], + x264_output_csp_names[0], #else "i420", #endif - stringify_names( buf, output_csp_names ) ); + stringify_names( buf, x264_output_csp_names ) ); H1( " --input-depth Specify input bit depth for raw input\n" ); H1( " --output-depth Specify output bit depth\n" ); H1( " --input-range Specify input color range [\"%s\"]\n" - " - %s\n", range_names[0], stringify_names( buf, range_names ) ); + " - %s\n", x264_range_names[0], stringify_names( buf, x264_range_names ) ); H1( " --input-res Specify input resolution (width x height)\n" ); H1( " --index Filename for input index file\n" ); H0( " --sar width:height Specify Sample Aspect Ratio\n" ); @@ -934,7 +933,7 @@ H0( " --level Specify level (as defined by Annex A)\n" ); H1( " --bluray-compat Enable compatibility hacks for Blu-ray support\n" ); H1( " --avcintra-class Use compatibility hacks for AVC-Intra class\n" - " - 50, 100, 200\n" ); + " - %s\n", stringify_names( buf, x264_avcintra_class_names ) ); H1( " --avcintra-flavor AVC-Intra flavor [\"%s\"]\n" " - %s\n", x264_avcintra_flavor_names[0], stringify_names( buf, x264_avcintra_flavor_names ) ); H1( " --stitchable Don't optimize headers based on video content\n" @@ -944,8 +943,8 @@ H1( " --no-progress Don't show the progress indicator while encoding\n" ); H0( " --quiet Quiet Mode\n" ); H1( " --log-level Specify the maximum level of logging [\"%s\"]\n" - " - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ), - stringify_names( buf, log_level_names ) ); + " - %s\n", strtable_lookup( x264_log_level_names, cli_log_level - X264_LOG_NONE ), + stringify_names( buf, x264_log_level_names ) ); H1( " --psnr Enable PSNR computation\n" ); H1( " --ssim Enable SSIM computation\n" ); H1( " --threads Force a specific number of threads\n" ); @@ -1396,9 +1395,9 @@ static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt ) { char *input_filename = NULL; - const char *demuxer = demuxer_names[0]; + const char *demuxer = x264_demuxer_names[0]; char *output_filename = NULL; - const char *muxer = muxer_names[0]; + const char *muxer = x264_muxer_names[0]; char *tcfile_name = NULL; x264_param_t defaults; char *profile = NULL; @@ -1480,10 +1479,10 @@ output_filename = optarg; break; case OPT_MUXER: - FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_name( optarg, x264_muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg ); break; case OPT_DEMUXER: - FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_name( optarg, x264_demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg ); break; case OPT_INDEX: input_opt.index_file = optarg; @@ -1508,7 +1507,7 @@ cli_log_level = param->i_log_level = X264_LOG_DEBUG; break; case OPT_LOG_LEVEL: - if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) ) + if( !parse_enum_value( optarg, x264_log_level_names, &cli_log_level ) ) cli_log_level += X264_LOG_NONE; else cli_log_level = atoi( optarg ); @@ -1547,7 +1546,7 @@ input_opt.timebase = optarg; break; case OPT_PULLDOWN: - FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg ); break; case OPT_VIDEO_FILTER: vid_filters = optarg; @@ -1571,7 +1570,7 @@ output_opt.use_dts_compress = 1; break; case OPT_OUTPUT_CSP: - FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg ); // correct the parsed value to the libx264 csp value #if X264_CHROMA_FORMAT static const uint8_t output_csp_fix[] = { X264_CHROMA_FORMAT, X264_CSP_RGB }; @@ -1581,11 +1580,11 @@ param->i_csp = output_csp = output_csp_fix[output_csp]; break; case OPT_INPUT_RANGE: - FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg ); input_opt.input_range += RANGE_AUTO; break; case OPT_RANGE: - FAIL_IF_ERROR( parse_enum_value( optarg, range_names, ¶m->vui.b_fullrange ), "Unknown range `%s'\n", optarg ); + FAIL_IF_ERROR( parse_enum_value( optarg, x264_range_names, ¶m->vui.b_fullrange ), "Unknown range `%s'\n", optarg ); input_opt.output_range = param->vui.b_fullrange += RANGE_AUTO; break; default: @@ -1668,6 +1667,10 @@ info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height, info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' ); + FAIL_IF_ERROR( info.width <= 0 || info.height <= 0 || + info.width > MAX_RESOLUTION || info.height > MAX_RESOLUTION, + "invalid width x height (%dx%d)\n", info.width, info.height ); + if( tcfile_name ) { FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" ); diff -Nru x264-0.157.2935+git545de2f/x264cli.h x264-0.160.3011+gitcde9a93/x264cli.h --- x264-0.157.2935+git545de2f/x264cli.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/x264cli.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264cli.h: x264cli common ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -32,8 +32,21 @@ /* In microseconds */ #define UPDATE_INTERVAL 250000 +#define MAX_RESOLUTION 16384 + typedef void *hnd_t; +extern const char * const x264_avcintra_class_names[]; +extern const char * const x264_cqm_names[]; +extern const char * const x264_log_level_names[]; +extern const char * const x264_partition_names[]; +extern const char * const x264_pulldown_names[]; +extern const char * const x264_range_names[]; +extern const char * const x264_output_csp_names[]; +extern const char * const x264_valid_profile_names[]; +extern const char * const x264_demuxer_names[]; +extern const char * const x264_muxer_names[]; + static inline uint64_t gcd( uint64_t a, uint64_t b ) { while( 1 ) @@ -62,6 +75,7 @@ void x264_cli_log( const char *name, int i_level, const char *fmt, ... ); void x264_cli_printf( int i_level, const char *fmt, ... ); +int x264_cli_autocomplete( const char *prev, const char *cur ); #ifdef _WIN32 void x264_cli_set_console_title( const char *title ); diff -Nru x264-0.157.2935+git545de2f/x264dll.c x264-0.160.3011+gitcde9a93/x264dll.c --- x264-0.157.2935+git545de2f/x264dll.c 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/x264dll.c 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264dll: x264 DLLMain for win32 ***************************************************************************** - * Copyright (C) 2009-2018 x264 project + * Copyright (C) 2009-2020 x264 project * * Authors: Anton Mitrofanov * diff -Nru x264-0.157.2935+git545de2f/x264.h x264-0.160.3011+gitcde9a93/x264.h --- x264-0.157.2935+git545de2f/x264.h 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/x264.h 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264.h: x264 public header ***************************************************************************** - * Copyright (C) 2003-2018 x264 project + * Copyright (C) 2003-2020 x264 project * * Authors: Laurent Aimar * Loren Merritt @@ -45,7 +45,20 @@ #include "x264_config.h" -#define X264_BUILD 157 +#define X264_BUILD 160 + +#ifdef _WIN32 +# define X264_DLL_IMPORT __declspec(dllimport) +# define X264_DLL_EXPORT __declspec(dllexport) +#else +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define X264_DLL_IMPORT +# define X264_DLL_EXPORT __attribute__((visibility("default"))) +# else +# define X264_DLL_IMPORT +# define X264_DLL_EXPORT +# endif +#endif /* Application developers planning to link against a shared library version of * libx264 from a Microsoft Visual Studio or similar development environment @@ -53,9 +66,13 @@ * This clause does not apply to MinGW, similar development environments, or non * Windows platforms. */ #ifdef X264_API_IMPORTS -#define X264_API __declspec(dllimport) +# define X264_API X264_DLL_IMPORT #else -#define X264_API +# ifdef X264_API_EXPORTS +# define X264_API X264_DLL_EXPORT +# else +# define X264_API +# endif #endif /* x264_t: @@ -119,57 +136,57 @@ /* CPU flags */ /* x86 */ -#define X264_CPU_MMX (1<<0) -#define X264_CPU_MMX2 (1<<1) /* MMX2 aka MMXEXT aka ISSE */ +#define X264_CPU_MMX (1U<<0) +#define X264_CPU_MMX2 (1U<<1) /* MMX2 aka MMXEXT aka ISSE */ #define X264_CPU_MMXEXT X264_CPU_MMX2 -#define X264_CPU_SSE (1<<2) -#define X264_CPU_SSE2 (1<<3) -#define X264_CPU_LZCNT (1<<4) -#define X264_CPU_SSE3 (1<<5) -#define X264_CPU_SSSE3 (1<<6) -#define X264_CPU_SSE4 (1<<7) /* SSE4.1 */ -#define X264_CPU_SSE42 (1<<8) /* SSE4.2 */ -#define X264_CPU_AVX (1<<9) /* Requires OS support even if YMM registers aren't used */ -#define X264_CPU_XOP (1<<10) /* AMD XOP */ -#define X264_CPU_FMA4 (1<<11) /* AMD FMA4 */ -#define X264_CPU_FMA3 (1<<12) -#define X264_CPU_BMI1 (1<<13) -#define X264_CPU_BMI2 (1<<14) -#define X264_CPU_AVX2 (1<<15) -#define X264_CPU_AVX512 (1<<16) /* AVX-512 {F, CD, BW, DQ, VL}, requires OS support */ +#define X264_CPU_SSE (1U<<2) +#define X264_CPU_SSE2 (1U<<3) +#define X264_CPU_LZCNT (1U<<4) +#define X264_CPU_SSE3 (1U<<5) +#define X264_CPU_SSSE3 (1U<<6) +#define X264_CPU_SSE4 (1U<<7) /* SSE4.1 */ +#define X264_CPU_SSE42 (1U<<8) /* SSE4.2 */ +#define X264_CPU_AVX (1U<<9) /* Requires OS support even if YMM registers aren't used */ +#define X264_CPU_XOP (1U<<10) /* AMD XOP */ +#define X264_CPU_FMA4 (1U<<11) /* AMD FMA4 */ +#define X264_CPU_FMA3 (1U<<12) +#define X264_CPU_BMI1 (1U<<13) +#define X264_CPU_BMI2 (1U<<14) +#define X264_CPU_AVX2 (1U<<15) +#define X264_CPU_AVX512 (1U<<16) /* AVX-512 {F, CD, BW, DQ, VL}, requires OS support */ /* x86 modifiers */ -#define X264_CPU_CACHELINE_32 (1<<17) /* avoid memory loads that span the border between two cachelines */ -#define X264_CPU_CACHELINE_64 (1<<18) /* 32/64 is the size of a cacheline in bytes */ -#define X264_CPU_SSE2_IS_SLOW (1<<19) /* avoid most SSE2 functions on Athlon64 */ -#define X264_CPU_SSE2_IS_FAST (1<<20) /* a few functions are only faster on Core2 and Phenom */ -#define X264_CPU_SLOW_SHUFFLE (1<<21) /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */ -#define X264_CPU_STACK_MOD4 (1<<22) /* if stack is only mod4 and not mod16 */ -#define X264_CPU_SLOW_ATOM (1<<23) /* The Atom is terrible: slow SSE unaligned loads, slow - * SIMD multiplies, slow SIMD variable shifts, slow pshufb, - * cacheline split penalties -- gather everything here that - * isn't shared by other CPUs to avoid making half a dozen - * new SLOW flags. */ -#define X264_CPU_SLOW_PSHUFB (1<<24) /* such as on the Intel Atom */ -#define X264_CPU_SLOW_PALIGNR (1<<25) /* such as on the AMD Bobcat */ +#define X264_CPU_CACHELINE_32 (1U<<17) /* avoid memory loads that span the border between two cachelines */ +#define X264_CPU_CACHELINE_64 (1U<<18) /* 32/64 is the size of a cacheline in bytes */ +#define X264_CPU_SSE2_IS_SLOW (1U<<19) /* avoid most SSE2 functions on Athlon64 */ +#define X264_CPU_SSE2_IS_FAST (1U<<20) /* a few functions are only faster on Core2 and Phenom */ +#define X264_CPU_SLOW_SHUFFLE (1U<<21) /* The Conroe has a slow shuffle unit (relative to overall SSE performance) */ +#define X264_CPU_STACK_MOD4 (1U<<22) /* if stack is only mod4 and not mod16 */ +#define X264_CPU_SLOW_ATOM (1U<<23) /* The Atom is terrible: slow SSE unaligned loads, slow + * SIMD multiplies, slow SIMD variable shifts, slow pshufb, + * cacheline split penalties -- gather everything here that + * isn't shared by other CPUs to avoid making half a dozen + * new SLOW flags. */ +#define X264_CPU_SLOW_PSHUFB (1U<<24) /* such as on the Intel Atom */ +#define X264_CPU_SLOW_PALIGNR (1U<<25) /* such as on the AMD Bobcat */ /* PowerPC */ -#define X264_CPU_ALTIVEC 0x0000001 +#define X264_CPU_ALTIVEC 0x0000001U /* ARM and AArch64 */ -#define X264_CPU_ARMV6 0x0000001 -#define X264_CPU_NEON 0x0000002 /* ARM NEON */ -#define X264_CPU_FAST_NEON_MRC 0x0000004 /* Transfer from NEON to ARM register is fast (Cortex-A9) */ -#define X264_CPU_ARMV8 0x0000008 +#define X264_CPU_ARMV6 0x0000001U +#define X264_CPU_NEON 0x0000002U /* ARM NEON */ +#define X264_CPU_FAST_NEON_MRC 0x0000004U /* Transfer from NEON to ARM register is fast (Cortex-A9) */ +#define X264_CPU_ARMV8 0x0000008U /* MIPS */ -#define X264_CPU_MSA 0x0000001 /* MIPS MSA */ +#define X264_CPU_MSA 0x0000001U /* MIPS MSA */ /* Analyse flags */ -#define X264_ANALYSE_I4x4 0x0001 /* Analyse i4x4 */ -#define X264_ANALYSE_I8x8 0x0002 /* Analyse i8x8 (requires 8x8 transform) */ -#define X264_ANALYSE_PSUB16x16 0x0010 /* Analyse p16x8, p8x16 and p8x8 */ -#define X264_ANALYSE_PSUB8x8 0x0020 /* Analyse p8x4, p4x8, p4x4 */ -#define X264_ANALYSE_BSUB16x16 0x0100 /* Analyse b16x8, b8x16 and b8x8 */ +#define X264_ANALYSE_I4x4 0x0001U /* Analyse i4x4 */ +#define X264_ANALYSE_I8x8 0x0002U /* Analyse i8x8 (requires 8x8 transform) */ +#define X264_ANALYSE_PSUB16x16 0x0010U /* Analyse p16x8, p8x16 and p8x8 */ +#define X264_ANALYSE_PSUB8x8 0x0020U /* Analyse p8x4, p4x8, p4x4 */ +#define X264_ANALYSE_BSUB16x16 0x0100U /* Analyse b16x8, b8x16 and b8x8 */ #define X264_DIRECT_PRED_NONE 0 #define X264_DIRECT_PRED_SPATIAL 1 @@ -287,7 +304,7 @@ typedef struct x264_param_t { /* CPU flags */ - unsigned int cpu; + uint32_t cpu; int i_threads; /* encode multiple frames in parallel */ int i_lookahead_threads; /* multiple threads for lookahead analysis */ int b_sliced_threads; /* Whether to use slice-based threading. */ @@ -459,10 +476,10 @@ non-mod16 video resolutions. */ struct { - unsigned int i_left; - unsigned int i_top; - unsigned int i_right; - unsigned int i_bottom; + int i_left; + int i_top; + int i_right; + int i_bottom; } crop_rect; /* frame packing arrangement flag */ @@ -568,7 +585,7 @@ void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque ); } x264_param_t; -void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); +X264_API void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ); /**************************************************************************** * H.264 level restriction information @@ -577,11 +594,11 @@ typedef struct x264_level_t { uint8_t level_idc; - uint32_t mbps; /* max macroblock processing rate (macroblocks/sec) */ - uint32_t frame_size; /* max frame size (macroblocks) */ - uint32_t dpb; /* max decoded picture buffer (mbs) */ - uint32_t bitrate; /* max bitrate (kbit/sec) */ - uint32_t cpb; /* max vbv buffer (kbit) */ + int32_t mbps; /* max macroblock processing rate (macroblocks/sec) */ + int32_t frame_size; /* max frame size (macroblocks) */ + int32_t dpb; /* max decoded picture buffer (mbs) */ + int32_t bitrate; /* max bitrate (kbit/sec) */ + int32_t cpb; /* max vbv buffer (kbit) */ uint16_t mv_range; /* max vertical mv component range (pixels) */ uint8_t mvs_per_2mb; /* max mvs per 2 consecutive mbs. */ uint8_t slice_rate; /* ?? */ @@ -600,7 +617,7 @@ /* x264_param_default: * fill x264_param_t with default values and do CPU detection */ -void x264_param_default( x264_param_t * ); +X264_API void x264_param_default( x264_param_t * ); /* x264_param_parse: * set one parameter by name. @@ -611,7 +628,7 @@ * value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */ #define X264_PARAM_BAD_NAME (-1) #define X264_PARAM_BAD_VALUE (-2) -int x264_param_parse( x264_param_t *, const char *name, const char *value ); +X264_API int x264_param_parse( x264_param_t *, const char *name, const char *value ); /**************************************************************************** * Advanced parameter handling functions @@ -655,13 +672,13 @@ * film, animation, grain, stillimage, psnr, and ssim are psy tunings. * * returns 0 on success, negative on failure (e.g. invalid preset/tune name). */ -int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune ); +X264_API int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune ); /* x264_param_apply_fastfirstpass: * If first-pass mode is set (rc.b_stat_read == 0, rc.b_stat_write == 1), * modify the encoder settings to disable options generally not useful on * the first pass. */ -void x264_param_apply_fastfirstpass( x264_param_t * ); +X264_API void x264_param_apply_fastfirstpass( x264_param_t * ); /* x264_param_apply_profile: * Applies the restrictions of the given profile. @@ -676,7 +693,7 @@ * decrease them. * * returns 0 on success, negative on failure (e.g. invalid profile name). */ -int x264_param_apply_profile( x264_param_t *, const char *profile ); +X264_API int x264_param_apply_profile( x264_param_t *, const char *profile ); /**************************************************************************** * Picture structures and functions @@ -785,7 +802,7 @@ void (*mb_info_free)( void* ); /* The macroblock is constant and remains unchanged from the previous frame. */ - #define X264_MBINFO_CONSTANT (1<<0) + #define X264_MBINFO_CONSTANT (1U<<0) /* More flags may be added in the future. */ /* Out: SSIM of the the frame luma (if x264_param_t.b_ssim is set) */ @@ -846,17 +863,17 @@ /* x264_picture_init: * initialize an x264_picture_t. Needs to be done if the calling application * allocates its own x264_picture_t as opposed to using x264_picture_alloc. */ -void x264_picture_init( x264_picture_t *pic ); +X264_API void x264_picture_init( x264_picture_t *pic ); /* x264_picture_alloc: * alloc data for a picture. You must call x264_picture_clean on it. * returns 0 on success, or -1 on malloc failure or invalid colorspace. */ -int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ); +X264_API int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height ); /* x264_picture_clean: * free associated resource for a x264_picture_t allocated with * x264_picture_alloc ONLY */ -void x264_picture_clean( x264_picture_t *pic ); +X264_API void x264_picture_clean( x264_picture_t *pic ); /**************************************************************************** * Encoder functions @@ -871,7 +888,7 @@ /* x264_encoder_open: * create a new encoder handler, all parameters from x264_param_t are copied */ -x264_t *x264_encoder_open( x264_param_t * ); +X264_API x264_t *x264_encoder_open( x264_param_t * ); /* x264_encoder_reconfig: * various parameters from x264_param_t are copied. @@ -886,7 +903,7 @@ * more so than for other presets, many of the speed shortcuts used in ultrafast cannot be * switched out of; using reconfig to switch between ultrafast and other presets is not * recommended without a more fine-grained breakdown of parameters to take this into account. */ -int x264_encoder_reconfig( x264_t *, x264_param_t * ); +X264_API int x264_encoder_reconfig( x264_t *, x264_param_t * ); /* x264_encoder_parameters: * copies the current internal set of parameters to the pointer provided * by the caller. useful when the calling application needs to know @@ -894,32 +911,32 @@ * of the encoder after multiple x264_encoder_reconfig calls. * note that the data accessible through pointers in the returned param struct * (e.g. filenames) should not be modified by the calling application. */ -void x264_encoder_parameters( x264_t *, x264_param_t * ); +X264_API void x264_encoder_parameters( x264_t *, x264_param_t * ); /* x264_encoder_headers: * return the SPS and PPS that will be used for the whole stream. * *pi_nal is the number of NAL units outputted in pp_nal. * returns the number of bytes in the returned NALs. * returns negative on error. * the payloads of all output NALs are guaranteed to be sequential in memory. */ -int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); +X264_API int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal ); /* x264_encoder_encode: * encode one picture. * *pi_nal is the number of NAL units outputted in pp_nal. * returns the number of bytes in the returned NALs. * returns negative on error and zero if no NAL units returned. * the payloads of all output NALs are guaranteed to be sequential in memory. */ -int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); +X264_API int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out ); /* x264_encoder_close: * close an encoder handler */ -void x264_encoder_close( x264_t * ); +X264_API void x264_encoder_close( x264_t * ); /* x264_encoder_delayed_frames: * return the number of currently delayed (buffered) frames * this should be used at the end of the stream, to know when you have all the encoded frames. */ -int x264_encoder_delayed_frames( x264_t * ); +X264_API int x264_encoder_delayed_frames( x264_t * ); /* x264_encoder_maximum_delayed_frames( x264_t * ): * return the maximum number of delayed (buffered) frames that can occur with the current * parameters. */ -int x264_encoder_maximum_delayed_frames( x264_t * ); +X264_API int x264_encoder_maximum_delayed_frames( x264_t * ); /* x264_encoder_intra_refresh: * If an intra refresh is not in progress, begin one with the next P-frame. * If an intra refresh is in progress, begin one as soon as the current one finishes. @@ -933,7 +950,7 @@ * behavior is undefined. * * Should not be called during an x264_encoder_encode. */ -void x264_encoder_intra_refresh( x264_t * ); +X264_API void x264_encoder_intra_refresh( x264_t * ); /* x264_encoder_invalidate_reference: * An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients * system. When the client has packet loss or otherwise incorrectly decodes a frame, the encoder @@ -956,7 +973,7 @@ * Should not be called during an x264_encoder_encode, but multiple calls can be made simultaneously. * * Returns 0 on success, negative on failure. */ -int x264_encoder_invalidate_reference( x264_t *, int64_t pts ); +X264_API int x264_encoder_invalidate_reference( x264_t *, int64_t pts ); #ifdef __cplusplus } diff -Nru x264-0.157.2935+git545de2f/x264res.rc x264-0.160.3011+gitcde9a93/x264res.rc --- x264-0.157.2935+git545de2f/x264res.rc 2018-09-25 12:35:00.000000000 +0000 +++ x264-0.160.3011+gitcde9a93/x264res.rc 2020-07-13 10:30:21.000000000 +0000 @@ -1,7 +1,7 @@ /***************************************************************************** * x264res.rc: windows resource file ***************************************************************************** - * Copyright (C) 2012-2018 x264 project + * Copyright (C) 2012-2020 x264 project * * Authors: Henrik Gramner * @@ -64,7 +64,7 @@ #endif VALUE "FileVersion", X264_POINTVER VALUE "InternalName", "x264" - VALUE "LegalCopyright", "Copyright (C) 2003-2018 x264 project" + VALUE "LegalCopyright", "Copyright (C) 2003-2020 x264 project" #ifdef DLL VALUE "OriginalFilename", "libx264-" xstr(X264_BUILD) ".dll" #else