diff -Nru libvpx-1.10.0/args.c libvpx-1.11.0/args.c --- libvpx-1.10.0/args.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/args.c 2021-10-06 17:41:19.000000000 +0000 @@ -16,8 +16,10 @@ #include "vpx/vpx_integer.h" #include "vpx_ports/msvc.h" -#if defined(__GNUC__) && __GNUC__ -extern void die(const char *fmt, ...) __attribute__((noreturn)); +#if defined(__GNUC__) +__attribute__((noreturn)) extern void die(const char *fmt, ...); +#elif defined(_MSC_VER) +__declspec(noreturn) extern void die(const char *fmt, ...); #else extern void die(const char *fmt, ...); #endif diff -Nru libvpx-1.10.0/AUTHORS libvpx-1.11.0/AUTHORS --- libvpx-1.10.0/AUTHORS 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/AUTHORS 2021-10-06 17:41:19.000000000 +0000 @@ -3,6 +3,7 @@ Aaron Watry Abo Talib Mahfoodh +Adam B. Goode Adrian Grange Ahmad Sharif Aidan Welch @@ -25,6 +26,7 @@ Aron Rosenberg Attila Nagy Birk Magnussen +Bohan Li Brian Foley Brion Vibber changjun.yang @@ -34,6 +36,7 @@ chm Chris Cunningham Christian Duvivier +Chunbo Hua Clement Courbet Daniele Castagna Daniel Kang @@ -68,6 +71,7 @@ Harish Mahendrakar Henrik Lundin Hien Ho +Hirokazu Honda Hui Su Ivan Krasin Ivan Maltz @@ -97,6 +101,7 @@ John Koleszar Johnny Klonaris John Stark +Jonathan Wright Jon Kunkee Jorge E. Moreira Joshua Bleecher Snyder @@ -146,6 +151,7 @@ Peter Boström Peter Collingbourne Peter de Rivaz +Peter Kasting Philip Jägenstedt Priit Laes Rafael Ávila de Espíndola diff -Nru libvpx-1.10.0/CHANGELOG libvpx-1.11.0/CHANGELOG --- libvpx-1.10.0/CHANGELOG 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/CHANGELOG 2021-10-06 17:41:19.000000000 +0000 @@ -1,3 +1,29 @@ +2021-09-27 v1.11.0 "Smew Duck" + This maintenance release adds support for VBR mode in VP9 rate control + interface, new codec controls to get quantization parameters and loop filter + levels, and includes several improvements to NEON and numerous bug fixes. + + - Upgrading: + New codec control is added to get quantization parameters and loop filter + levels. + + VBR mode is supported in VP9 rate control library. + + - Enhancement: + Numerous improvements for Neon optimizations. + Code clean-up and refactoring. + Calculation of rd multiplier is changed with BDRATE gains. + + - Bug fixes: + Fix to overflow on duration. + Fix to several instances of -Wunused-but-set-variable. + Fix to avoid chroma resampling for 420mpeg2 input. + Fix to overflow in calc_iframe_target_size. + Fix to disallow skipping transform and quantization. + Fix some -Wsign-compare warnings in simple_encode. + Fix input file path in simple_encode_test. + Fix valid range for under/over_shoot pct. + 2021-03-09 v1.10.0 "Ruddy Duck" This maintenance release adds support for darwin20 and new codec controls, as well as numerous bug fixes. diff -Nru libvpx-1.10.0/CONTRIBUTING.md libvpx-1.11.0/CONTRIBUTING.md --- libvpx-1.10.0/CONTRIBUTING.md 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/CONTRIBUTING.md 2021-10-06 17:41:19.000000000 +0000 @@ -19,10 +19,9 @@ All submissions, including submissions by project members, require review. We use a [Gerrit](https://www.gerritcodereview.com) instance hosted at -https://chromium-review.googlesource.com for this purpose. - -See https://www.webmproject.org/code/contribute/submitting-patches for an -example of a typical gerrit workflow. +https://chromium-review.googlesource.com for this purpose. See the +[WebM Project page](https://www.webmproject.org/code/contribute/submitting-patches/) +for additional details. ## Community Guidelines diff -Nru libvpx-1.10.0/debian/changelog libvpx-1.11.0/debian/changelog --- libvpx-1.10.0/debian/changelog 2021-08-28 01:22:48.000000000 +0000 +++ libvpx-1.11.0/debian/changelog 2022-02-09 17:53:46.000000000 +0000 @@ -1,46 +1,106 @@ -libvpx (1.10.0-0sergeyd2.2~18.04.1) bionic; urgency=medium +libvpx (1.11.0-2ubuntu1sergeyd1~18.04.1) bionic; urgency=medium - * New upstream release + * Rebuild for SergeyD - -- Sergey Dryabzhinsky Sat, 28 Aug 2021 00:29:56 +0300 + -- Sergey Dryabzhinsky Wed, 09 Feb 2022 18:20:31 +0300 -libvpx (1.9.0-0sergeyd2.2~16.04.1) xenial; urgency=medium +libvpx (1.11.0-2ubuntu1) jammy; urgency=medium - * New upstream release - * Remove unused patches + * Merge with Debian, reamining Ubuntu changes: + * debian/rules: + - Disable LTO and fix FTBFS when building with GCC 11. - -- Sergey Dryabzhinsky Wed, 07 Apr 2021 22:36:50 +0300 + -- Robert Ancell Wed, 17 Nov 2021 10:11:05 +1300 -libvpx (1.8.2-0sergeyd2.2~trusty1) trusty; urgency=medium +libvpx (1.11.0-2) unstable; urgency=medium - * New upstream release + * Team upload + * Upload to unstable + + -- Sebastian Ramacher Sun, 24 Oct 2021 17:08:21 +0200 + +libvpx (1.11.0-1) experimental; urgency=medium + + * Team upload - -- Sergey Dryabzhinsky Sun, 19 Jan 2020 19:52:07 +0300 + [ Jakub Adam ] + * Enable VP9 high bit depth (10/12) profiles. -libvpx (1.8.1-0sergeyd2.1~trusty1) trusty; urgency=medium + [ Sebastian Ramacher ] + * New upstream version 1.11.0 + * SONAME bump: libvpx6 -> libvpx7 + * debian/control: Remove incorrect MA: foreign from vpx-tools - * Fix libvpx6/vps6-tools deps + -- Sebastian Ramacher Sun, 10 Oct 2021 22:18:17 +0200 - -- Sergey Dryabzhinsky Fri, 16 Aug 2019 21:48:00 +0300 +libvpx (1.10.0-2) unstable; urgency=medium -libvpx (1.8.1-0sergeyd2~trusty1) trusty; urgency=medium + * debian/: Relax ABI version check and bump minimum version for init + functions + -- Sebastian Ramacher Tue, 31 Aug 2021 08:59:02 +0200 + +libvpx (1.10.0-1) unstable; urgency=medium + + * Team upload + + [ Debian Janitor ] + * Set upstream metadata fields: Repository, Repository-Browse. + * Remove constraints unnecessary since buster + + [ Sebastian Ramacher ] * New upstream release - * Bump library version to 6 - * Try to disable NEON build on Trusty - * Disable libyuv on Precise + * debian/control: + - Bump Standards-Version + - Drop obsolete Pre-Depends + * debian/rules: Do not install README and AUTHORS - -- Sergey Dryabzhinsky Mon, 29 Jul 2019 14:04:47 +0300 + -- Sebastian Ramacher Tue, 24 Aug 2021 22:56:11 +0200 -libvpx (1.7.0-3sergeyd1.2~trusty1) trusty; urgency=medium +libvpx (1.9.0-1ubuntu1) impish; urgency=medium - * Rebuild for SergeyD - * Rename packages to libvpx5 - * Respect hardening env - * Disable NEON on ARM(64) with old GCC < 4.9 - * Disable libyuv on GCC < 4.7 + * d/rules: Disable LTO and fix FTBFS when building with GCC 11. + (LP: #1939640) + + -- Sergio Durigan Junior Wed, 11 Aug 2021 22:25:45 -0400 - -- Sergey Dryabzhinsky Wed, 10 Oct 2018 01:38:34 +0300 +libvpx (1.9.0-1) unstable; urgency=medium + + * Team upload + * New upstream release (Closes: #976835) + * debian/watch: versionmangle for RC releases + * debian/control: + - Bump Standards-Version + - Remove unneeded ${shlibs:Depends} from libvpx-dev + - Bump debhelper compat to 13 + - Set RRR: no + * debian/rules: Use /usr/share/dpkg/architecture.mk + + -- Sebastian Ramacher Tue, 08 Dec 2020 18:02:59 +0100 + +libvpx (1.8.2-1) unstable; urgency=medium + + * New upstream version 1.8.2. + * Bump Standards-Version to 4.4.1. + + -- Ondřej Nový Fri, 27 Dec 2019 17:59:25 +0100 + +libvpx (1.8.1-2) unstable; urgency=medium + + * Uploading to unstable. + + -- Ondřej Nový Mon, 05 Aug 2019 08:46:57 +0200 + +libvpx (1.8.1-1) experimental; urgency=medium + + * New upstream release + - This release is ABI incompatible + * Bump debhelper compat level to 12 and use debhelper-compat + * Bump Standards-Version to 4.4.0 (no changes needed) + * d/changelog, d/control: Remove trailing empty line at the end of file + * Fix installation of neon version of library on ARM (Closes: #922817) + + -- Ondřej Nový Wed, 17 Jul 2019 00:05:27 +0200 libvpx (1.7.0-3) unstable; urgency=medium @@ -462,4 +522,3 @@ * Initial Debian packaging (Closes: #582271). -- Sebastian Dröge Tue, 18 May 2010 20:49:11 +0200 - diff -Nru libvpx-1.10.0/debian/control libvpx-1.11.0/debian/control --- libvpx-1.10.0/debian/control 2019-08-16 18:47:41.000000000 +0000 +++ libvpx-1.11.0/debian/control 2022-02-09 15:21:23.000000000 +0000 @@ -1,26 +1,27 @@ Source: libvpx Section: video Priority: optional -Maintainer: Debian Multimedia Maintainers +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Debian Multimedia Maintainers Uploaders: Sebastian Dröge , Ondřej Nový , -Build-Depends: debhelper (>= 9), lsb-release, - yasm (>= 0.7) [amd64 i386 hurd-i386 kfreebsd-amd64 kfreebsd-i386], - dh-exec (>= 0.23), +Build-Depends: debhelper (>= 9), + yasm [amd64 i386 hurd-i386 kfreebsd-amd64 kfreebsd-i386], + dh-exec (>= 0.23) Build-Depends-Indep: doxygen -Standards-Version: 4.1.3 -Homepage: http://www.webmproject.org +Standards-Version: 4.6.0 +Homepage: https://www.webmproject.org Vcs-Git: https://salsa.debian.org/multimedia-team/libvpx.git Vcs-Browser: https://salsa.debian.org/multimedia-team/libvpx +Rules-Requires-Root: no -Package: libvpx6-dev +Package: libvpx7-dev Section: libdevel Architecture: any -Depends: libvpx6 (= ${binary:Version}), - ${shlibs:Depends}, +Depends: libvpx7 (= ${binary:Version}), ${misc:Depends} -Conflicts: libvpx-dev, libvpx3-dev, libvpx5-dev -Replaces: libvpx-dev, libvpx3-dev, libvpx5-dev +Conflicts: libvpx-dev, libvpx3-dev, libvpx5-dev, libvpx6-dev +Replaces: libvpx-dev, libvpx3-dev, libvpx5-dev, libvpx6-dev Provides: libvpx-dev Multi-Arch: same Description: VP8 and VP9 video codec (development files) @@ -31,13 +32,12 @@ This package contains the development libraries, header files needed by programs that want to compile with libvpx. -Package: libvpx6 +Package: libvpx7 Section: libs Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends} Multi-Arch: same -Pre-Depends: ${misc:Pre-Depends} Description: VP8 and VP9 video codec (shared library) VP8 and VP9 are open video codecs, originally developed by On2 and released as open source by Google Inc. They are the successor of the VP3 codec, @@ -45,7 +45,7 @@ . This package contains the shared libraries. -Package: libvpx6-doc +Package: libvpx7-doc Section: doc Architecture: all Multi-Arch: foreign @@ -57,18 +57,17 @@ on which the Theora codec was based. . This package contains the HTML documentation for the libvpx library - in /usr/share/doc/libvpx5-doc. + in /usr/share/doc/libvpx-doc. -Package: vpx6-tools +Package: vpx7-tools Section: utils Architecture: any -Depends: libvpx6 (= ${binary:Version}), +Depends: libvpx7 (= ${binary:Version}), ${shlibs:Depends}, ${misc:Depends} -Conflicts: vpx-tools, vpx3-tools, vpx5-tools -Replaces: vpx-tools, vpx3-tools, vpx5-tools +Conflicts: vpx-tools, vpx3-tools, vpx5-tools, vpx6-tools +Replaces: vpx-tools, vpx3-tools, vpx5-tools, vpx6-tools Provides: vpx-tools -Multi-Arch: foreign Description: VP8 and VP9 video codec encoding/decoding tools VP8 and VP9 are open video codecs, originally developed by On2 and released as open source by Google Inc. They are the successor of the VP3 codec, diff -Nru libvpx-1.10.0/debian/copyright libvpx-1.11.0/debian/copyright --- libvpx-1.10.0/debian/copyright 2018-02-14 15:11:55.000000000 +0000 +++ libvpx-1.11.0/debian/copyright 2021-11-16 21:11:05.000000000 +0000 @@ -62,7 +62,7 @@ Files: debian/* Copyright: (c) 2010-2016, Sebastian Dröge - (c) 2016-2018, Ondřej Nový + (c) 2016-2019, Ondřej Nový License: BSD-3-Clause License: BSD-3-Clause diff -Nru libvpx-1.10.0/debian/libvpx6-dev.examples libvpx-1.11.0/debian/libvpx6-dev.examples --- libvpx-1.10.0/debian/libvpx6-dev.examples 2018-02-17 12:47:07.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6-dev.examples 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -examples/* diff -Nru libvpx-1.10.0/debian/libvpx6-dev.install libvpx-1.11.0/debian/libvpx6-dev.install --- libvpx-1.10.0/debian/libvpx6-dev.install 2018-02-14 15:08:06.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6-dev.install 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -#! /usr/bin/dh-exec --with-scripts=subst-multiarch -builddir/vpx-vp8-*/lib/libvpx.so /usr/lib/${DEB_HOST_MULTIARCH}/ -builddir/vpx-vp8-*/lib/libvpx.a /usr/lib/${DEB_HOST_MULTIARCH}/ -builddir/vpx-vp8-*/include/vpx usr/include -builddir/vpx-vp8-*/lib/pkgconfig usr/lib/${DEB_HOST_MULTIARCH}/ diff -Nru libvpx-1.10.0/debian/libvpx6-doc.doc-base libvpx-1.11.0/debian/libvpx6-doc.doc-base --- libvpx-1.10.0/debian/libvpx6-doc.doc-base 2019-07-29 11:01:59.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6-doc.doc-base 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -Document: libvpx6-doc -Title: WebM Codec SDK -Author: Google, Inc. -Abstract: SDK allows you to integrate your applications with the VP8 and VP9 video codecs -Section: Programming/C++ - -Format: HTML -Index: /usr/share/doc/libvpx6-doc/html/index.html -Files: /usr/share/doc/libvpx6-doc/html/* diff -Nru libvpx-1.10.0/debian/libvpx6-doc.install libvpx-1.11.0/debian/libvpx6-doc.install --- libvpx-1.10.0/debian/libvpx6-doc.install 2019-07-29 11:02:04.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6-doc.install 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -builddir/docs/html usr/share/doc/libvpx6-doc diff -Nru libvpx-1.10.0/debian/libvpx6.install libvpx-1.11.0/debian/libvpx6.install --- libvpx-1.10.0/debian/libvpx6.install 2018-02-14 15:08:06.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6.install 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -#! /usr/bin/dh-exec --with-scripts=subst-multiarch -builddir/vpx-vp8-*/lib/libvpx.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ -builddir-neon/vpx-vp8-*/lib/libvpx.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vfp/neon [arm] diff -Nru libvpx-1.10.0/debian/libvpx6.symbols libvpx-1.11.0/debian/libvpx6.symbols --- libvpx-1.10.0/debian/libvpx6.symbols 2020-12-08 13:41:52.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx6.symbols 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -libvpx.so.6 libvpx6 #MINVER# -* Build-Depends-Package: libvpx-dev - vpx_codec_build_config@Base 1.6.0 - vpx_codec_control_@Base 1.6.0 - vpx_codec_dec_init_ver@Base 1.6.0 - vpx_codec_decode@Base 1.6.0 - vpx_codec_destroy@Base 1.6.0 - vpx_codec_enc_config_default@Base 1.6.0 - vpx_codec_enc_config_set@Base 1.6.0 - vpx_codec_enc_init_multi_ver@Base 1.6.0 - vpx_codec_enc_init_ver@Base 1.6.0 - vpx_codec_encode@Base 1.6.0 - vpx_codec_err_to_string@Base 1.6.0 - vpx_codec_error@Base 1.6.0 - vpx_codec_error_detail@Base 1.6.0 - vpx_codec_get_caps@Base 1.6.0 - vpx_codec_get_cx_data@Base 1.6.0 - vpx_codec_get_frame@Base 1.6.0 - vpx_codec_get_global_headers@Base 1.6.0 - vpx_codec_get_preview_frame@Base 1.6.0 - vpx_codec_get_stream_info@Base 1.6.0 - vpx_codec_iface_name@Base 1.6.0 - vpx_codec_peek_stream_info@Base 1.6.0 - vpx_codec_register_put_frame_cb@Base 1.6.0 - vpx_codec_register_put_slice_cb@Base 1.6.0 - vpx_codec_set_cx_data_buf@Base 1.6.0 - vpx_codec_set_frame_buffer_functions@Base 1.6.0 - vpx_codec_version@Base 1.6.0 - vpx_codec_version_extra_str@Base 1.6.0 - vpx_codec_version_str@Base 1.6.0 - vpx_codec_vp8_cx@Base 1.6.0 - vpx_codec_vp8_cx_algo@Base 1.6.0 - vpx_codec_vp8_dx@Base 1.6.0 - vpx_codec_vp8_dx_algo@Base 1.6.0 - vpx_codec_vp9_cx@Base 1.6.0 - vpx_codec_vp9_cx_algo@Base 1.6.0 - vpx_codec_vp9_dx@Base 1.6.0 - vpx_codec_vp9_dx_algo@Base 1.6.0 - vpx_img_alloc@Base 1.6.0 - vpx_img_flip@Base 1.6.0 - vpx_img_free@Base 1.6.0 - vpx_img_set_rect@Base 1.6.0 - vpx_img_wrap@Base 1.6.0 diff -Nru libvpx-1.10.0/debian/libvpx7-dev.examples libvpx-1.11.0/debian/libvpx7-dev.examples --- libvpx-1.10.0/debian/libvpx7-dev.examples 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7-dev.examples 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1 @@ +examples/* diff -Nru libvpx-1.10.0/debian/libvpx7-dev.install libvpx-1.11.0/debian/libvpx7-dev.install --- libvpx-1.10.0/debian/libvpx7-dev.install 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7-dev.install 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,5 @@ +#! /usr/bin/dh-exec --with-scripts=subst-multiarch +builddir/vpx-vp8-*/lib/libvpx.so /usr/lib/${DEB_HOST_MULTIARCH}/ +builddir/vpx-vp8-*/lib/libvpx.a /usr/lib/${DEB_HOST_MULTIARCH}/ +builddir/vpx-vp8-*/include/vpx usr/include +builddir/vpx-vp8-*/lib/pkgconfig usr/lib/${DEB_HOST_MULTIARCH}/ diff -Nru libvpx-1.10.0/debian/libvpx7-doc.doc-base libvpx-1.11.0/debian/libvpx7-doc.doc-base --- libvpx-1.10.0/debian/libvpx7-doc.doc-base 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7-doc.doc-base 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,9 @@ +Document: libvpx-doc +Title: WebM Codec SDK +Author: Google, Inc. +Abstract: SDK allows you to integrate your applications with the VP8 and VP9 video codecs +Section: Programming/C++ + +Format: HTML +Index: /usr/share/doc/libvpx-doc/html/index.html +Files: /usr/share/doc/libvpx-doc/html/* diff -Nru libvpx-1.10.0/debian/libvpx7-doc.install libvpx-1.11.0/debian/libvpx7-doc.install --- libvpx-1.10.0/debian/libvpx7-doc.install 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7-doc.install 2021-12-29 12:17:32.000000000 +0000 @@ -0,0 +1 @@ +builddir/docs/html usr/share/doc/libvpx7-doc diff -Nru libvpx-1.10.0/debian/libvpx7.install libvpx-1.11.0/debian/libvpx7.install --- libvpx-1.10.0/debian/libvpx7.install 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7.install 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,3 @@ +#! /usr/bin/dh-exec --with-scripts=subst-multiarch +builddir/vpx-vp8-*/lib/libvpx.so.* /usr/lib/${DEB_HOST_MULTIARCH}/ +builddir-neon/vpx-vp8-*/lib/libvpx.so.* /usr/lib/${DEB_HOST_MULTIARCH}/vfp/neon [armhf armel] diff -Nru libvpx-1.10.0/debian/libvpx7.symbols libvpx-1.11.0/debian/libvpx7.symbols --- libvpx-1.10.0/debian/libvpx7.symbols 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/libvpx7.symbols 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,43 @@ +libvpx.so.7 libvpx7 #MINVER# +* Build-Depends-Package: libvpx-dev + vpx_codec_build_config@Base 1.6.0 + vpx_codec_control_@Base 1.6.0 + vpx_codec_dec_init_ver@Base 1.10.0 + vpx_codec_decode@Base 1.6.0 + vpx_codec_destroy@Base 1.6.0 + vpx_codec_enc_config_default@Base 1.6.0 + vpx_codec_enc_config_set@Base 1.6.0 + vpx_codec_enc_init_multi_ver@Base 1.10.0 + vpx_codec_enc_init_ver@Base 1.10.0 + vpx_codec_encode@Base 1.6.0 + vpx_codec_err_to_string@Base 1.6.0 + vpx_codec_error@Base 1.6.0 + vpx_codec_error_detail@Base 1.6.0 + vpx_codec_get_caps@Base 1.6.0 + vpx_codec_get_cx_data@Base 1.6.0 + vpx_codec_get_frame@Base 1.6.0 + vpx_codec_get_global_headers@Base 1.6.0 + vpx_codec_get_preview_frame@Base 1.6.0 + vpx_codec_get_stream_info@Base 1.6.0 + vpx_codec_iface_name@Base 1.6.0 + vpx_codec_peek_stream_info@Base 1.6.0 + vpx_codec_register_put_frame_cb@Base 1.6.0 + vpx_codec_register_put_slice_cb@Base 1.6.0 + vpx_codec_set_cx_data_buf@Base 1.6.0 + vpx_codec_set_frame_buffer_functions@Base 1.6.0 + vpx_codec_version@Base 1.6.0 + vpx_codec_version_extra_str@Base 1.6.0 + vpx_codec_version_str@Base 1.6.0 + vpx_codec_vp8_cx@Base 1.6.0 + vpx_codec_vp8_cx_algo@Base 1.6.0 + vpx_codec_vp8_dx@Base 1.6.0 + vpx_codec_vp8_dx_algo@Base 1.6.0 + vpx_codec_vp9_cx@Base 1.6.0 + vpx_codec_vp9_cx_algo@Base 1.6.0 + vpx_codec_vp9_dx@Base 1.6.0 + vpx_codec_vp9_dx_algo@Base 1.6.0 + vpx_img_alloc@Base 1.6.0 + vpx_img_flip@Base 1.6.0 + vpx_img_free@Base 1.6.0 + vpx_img_set_rect@Base 1.6.0 + vpx_img_wrap@Base 1.6.0 diff -Nru libvpx-1.10.0/debian/patches/0001-Relax-ABI-check.patch libvpx-1.11.0/debian/patches/0001-Relax-ABI-check.patch --- libvpx-1.10.0/debian/patches/0001-Relax-ABI-check.patch 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/patches/0001-Relax-ABI-check.patch 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,45 @@ +From: Sebastian Ramacher +Date: Tue, 31 Aug 2021 08:53:52 +0200 +Subject: Relax ABI check + +We have symbol files and version dependencies to properly track this. +--- + vpx/src/vpx_decoder.c | 2 +- + vpx/src/vpx_encoder.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c +index 427cd1b..eddbf1a 100644 +--- a/vpx/src/vpx_decoder.c ++++ b/vpx/src/vpx_decoder.c +@@ -27,7 +27,7 @@ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, + vpx_codec_flags_t flags, int ver) { + vpx_codec_err_t res; + +- if (ver != VPX_DECODER_ABI_VERSION) ++ if (ver > VPX_DECODER_ABI_VERSION) + res = VPX_CODEC_ABI_MISMATCH; + else if (!ctx || !iface) + res = VPX_CODEC_INVALID_PARAM; +diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c +index f636b54..9f8eac9 100644 +--- a/vpx/src/vpx_encoder.c ++++ b/vpx/src/vpx_encoder.c +@@ -32,7 +32,7 @@ vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, + vpx_codec_flags_t flags, int ver) { + vpx_codec_err_t res; + +- if (ver != VPX_ENCODER_ABI_VERSION) ++ if (ver > VPX_ENCODER_ABI_VERSION) + res = VPX_CODEC_ABI_MISMATCH; + else if (!ctx || !iface || !cfg) + res = VPX_CODEC_INVALID_PARAM; +@@ -67,7 +67,7 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver( + int num_enc, vpx_codec_flags_t flags, vpx_rational_t *dsf, int ver) { + vpx_codec_err_t res = VPX_CODEC_OK; + +- if (ver != VPX_ENCODER_ABI_VERSION) ++ if (ver > VPX_ENCODER_ABI_VERSION) + res = VPX_CODEC_ABI_MISMATCH; + else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1)) + res = VPX_CODEC_INVALID_PARAM; diff -Nru libvpx-1.10.0/debian/patches/series libvpx-1.11.0/debian/patches/series --- libvpx-1.10.0/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/patches/series 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1 @@ +0001-Relax-ABI-check.patch diff -Nru libvpx-1.10.0/debian/rules libvpx-1.11.0/debian/rules --- libvpx-1.10.0/debian/rules 2019-08-01 22:36:16.000000000 +0000 +++ libvpx-1.11.0/debian/rules 2021-12-29 12:18:15.000000000 +0000 @@ -1,17 +1,14 @@ #!/usr/bin/make -f -export DEB_BUILD_MAINT_OPTIONS ?= hardening=-pie - -CODENAME ?= $(shell lsb_release -c -s) +export DEB_BUILD_MAINT_OPTIONS = hardening=-pie DEB_CFLAGS_MAINT_APPEND = -Wall DEB_CXXFLAGS_MAINT_APPEND = -Wall -DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) -DEB_HOST_ARCH ?= $(shell dpkg-architecture -qDEB_HOST_ARCH) -DEB_HOST_ARCH_CPU ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU) -DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) +# The build fails with GCC 11 when using LTO. +export DEB_BUILD_MAINT_OPTIONS += optimize=-lto + +include /usr/share/dpkg/architecture.mk configure_flags += \ --prefix=/usr \ @@ -27,10 +24,6 @@ --enable-vp9-postproc \ --enable-vp9-highbitdepth -ifneq (,$(filter $(CODENAME),lucid precise)) -configure_flags += --disable-libyuv -endif - ifeq ($(DEB_HOST_ARCH_CPU),arm) configure_flags_neon := $(configure_flags) --target=armv7-linux-gcc BUILD_NEON=Yes @@ -38,35 +31,14 @@ ifeq ($(DEB_HOST_ARCH), arm64) configure_flags += --target=arm64-linux-gcc - -ifneq (,$(filter $(CODENAME),trusty)) -# GCC too old and not compatible with NEON optimizations here -BUILD_NEON=No -configure_flags += --disable-neon --disable-neon-asm -endif - else ifeq ($(DEB_HOST_ARCH), armel) -CFLAGS += -marm -CXXFLAGS += -marm -configure_flags += --target=armv7-linux-gcc --enable-small --disable-neon -BUILD_NEON=No - +configure_flags += --target=generic-gnu --enable-small else ifeq ($(DEB_HOST_ARCH), armhf) # now armhf is ARMv7, but ARMv7 in vpx means NEON, which is not mandatory on armhf -# thus we use ARMv6 and -marm (since no thumb2 on ARMv6) to ensure compatability -# with all ARMv7 cores we support. -CFLAGS += -marm -CXXFLAGS += -marm -configure_flags += --target=armv7-linux-gcc --enable-small - -ifneq (,$(filter $(CODENAME),trusty)) -# GCC too old and not compatible with NEON optimizations here -configure_flags += --disable-neon-asm -configure_flags_neon += --disable-neon-asm -endif - +# thus we use generic-gnu +configure_flags += --target=generic-gnu --enable-small else ifeq ($(DEB_HOST_ARCH), amd64) configure_flags += --target=x86_64-linux-gcc @@ -142,10 +114,3 @@ # don't use stripped library... cp -v $(builddir)/libvpx_g.a \ $(builddir)/vpx-vp8-*/lib/libvpx.a - -override_dh_installdocs: - dh_installdocs - dh_installdocs -A README AUTHORS - -override_dh_builddeb: - dh_builddeb -- -Zxz diff -Nru libvpx-1.10.0/debian/tests/encode-testimage libvpx-1.11.0/debian/tests/encode-testimage --- libvpx-1.10.0/debian/tests/encode-testimage 2018-02-14 15:08:06.000000000 +0000 +++ libvpx-1.11.0/debian/tests/encode-testimage 2021-11-16 21:11:05.000000000 +0000 @@ -4,7 +4,7 @@ set -e -cd "$ADTTMP" +cd "$AUTOPKGTEST_TMP" ffmpeg -y -filter_complex testsrc -t 10 -pix_fmt yuv420p in.y4m vpxenc --codec=vp9 -o out.raw in.y4m diff -Nru libvpx-1.10.0/debian/upstream/metadata libvpx-1.11.0/debian/upstream/metadata --- libvpx-1.10.0/debian/upstream/metadata 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/upstream/metadata 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,3 @@ +--- +Repository: https://github.com/webmproject/libvpx.git +Repository-Browse: https://github.com/webmproject/libvpx diff -Nru libvpx-1.10.0/debian/vpx6-tools.install libvpx-1.11.0/debian/vpx6-tools.install --- libvpx-1.10.0/debian/vpx6-tools.install 2018-02-14 15:08:06.000000000 +0000 +++ libvpx-1.11.0/debian/vpx6-tools.install 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -builddir/vpxenc usr/bin -builddir/vpxdec usr/bin diff -Nru libvpx-1.10.0/debian/vpx6-tools.manpages libvpx-1.11.0/debian/vpx6-tools.manpages --- libvpx-1.10.0/debian/vpx6-tools.manpages 2018-02-14 15:08:06.000000000 +0000 +++ libvpx-1.11.0/debian/vpx6-tools.manpages 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -debian/vpxenc.1 -debian/vpxdec.1 diff -Nru libvpx-1.10.0/debian/vpx7-tools.install libvpx-1.11.0/debian/vpx7-tools.install --- libvpx-1.10.0/debian/vpx7-tools.install 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/vpx7-tools.install 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,2 @@ +builddir/vpxenc usr/bin +builddir/vpxdec usr/bin diff -Nru libvpx-1.10.0/debian/vpx7-tools.manpages libvpx-1.11.0/debian/vpx7-tools.manpages --- libvpx-1.10.0/debian/vpx7-tools.manpages 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-1.11.0/debian/vpx7-tools.manpages 2021-11-16 21:11:05.000000000 +0000 @@ -0,0 +1,2 @@ +debian/vpxenc.1 +debian/vpxdec.1 diff -Nru libvpx-1.10.0/debian/watch libvpx-1.11.0/debian/watch --- libvpx-1.10.0/debian/watch 2018-02-14 15:11:55.000000000 +0000 +++ libvpx-1.11.0/debian/watch 2021-11-16 21:11:05.000000000 +0000 @@ -1,3 +1,3 @@ version=3 -opts=filenamemangle=s/.+\/v?(\d\S*)\.tar\.gz/libvpx-$1\.tar\.gz/ \ +opts=uversionmangle=s/-rc/~rc/,filenamemangle=s/.+\/v?(\d\S*)\.tar\.gz/libvpx-$1\.tar\.gz/ \ https://github.com/webmproject/libvpx/tags .*/v?(\d\S*)\.tar\.gz diff -Nru libvpx-1.10.0/examples/vpx_temporal_svc_encoder.c libvpx-1.11.0/examples/vpx_temporal_svc_encoder.c --- libvpx-1.10.0/examples/vpx_temporal_svc_encoder.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/examples/vpx_temporal_svc_encoder.c 2021-10-06 17:41:19.000000000 +0000 @@ -831,6 +831,7 @@ } else if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_svc_extra_cfg_t svc_params; memset(&svc_params, 0, sizeof(svc_params)); + vpx_codec_control(&codec, VP9E_SET_POSTENCODE_DROP, 0); vpx_codec_control(&codec, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, 0); vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); diff -Nru libvpx-1.10.0/libs.mk libvpx-1.11.0/libs.mk --- libvpx-1.10.0/libs.mk 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/libs.mk 2021-10-06 17:41:19.000000000 +0000 @@ -299,8 +299,8 @@ # To determine SO_VERSION_{MAJOR,MINOR,PATCH}, calculate c,a,r with current # SO_VERSION_* then follow the rules in the link to detemine the new version # (c1, a1, r1) and set MAJOR to [c1-a1], MINOR to a1 and PATCH to r1 -SO_VERSION_MAJOR := 6 -SO_VERSION_MINOR := 4 +SO_VERSION_MAJOR := 7 +SO_VERSION_MINOR := 0 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib @@ -493,10 +493,12 @@ $(call enabled,TEST_INTRA_PRED_SPEED_SRCS)) TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS))) +ifeq ($(CONFIG_VP9_ENCODER),yes) RC_INTERFACE_TEST_BIN=./test_rc_interface$(EXE_SFX) RC_INTERFACE_TEST_SRCS=$(call addprefix_clean,test/,\ $(call enabled,RC_INTERFACE_TEST_SRCS)) RC_INTERFACE_TEST_OBJS := $(sort $(call objs,$(RC_INTERFACE_TEST_SRCS))) +endif SIMPLE_ENCODE_TEST_BIN=./test_simple_encode$(EXE_SFX) SIMPLE_ENCODE_TEST_SRCS=$(call addprefix_clean,test/,\ @@ -597,6 +599,7 @@ -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^ endif # TEST_INTRA_PRED_SPEED +ifeq ($(CONFIG_VP9_ENCODER),yes) ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),) PROJECTS-$(CONFIG_MSVS) += test_rc_interface.$(VCPROJ_SFX) test_rc_interface.$(VCPROJ_SFX): $(RC_INTERFACE_TEST_SRCS) vpx.$(VCPROJ_SFX) \ @@ -616,6 +619,7 @@ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ -L. -l$(CODEC_LIB) -l$(RC_RTC_LIB) -l$(GTEST_LIB) $^ endif # RC_INTERFACE_TEST +endif # CONFIG_VP9_ENCODER endif else @@ -657,6 +661,7 @@ -L. -lvpx -lgtest $(extralibs) -lm)) endif # TEST_INTRA_PRED_SPEED +ifeq ($(CONFIG_VP9_ENCODER),yes) ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),) $(RC_INTERFACE_TEST_OBJS) $(RC_INTERFACE_TEST_OBJS:.o=.d): \ CXXFLAGS += $(GTEST_INCLUDES) @@ -668,6 +673,7 @@ $(RC_INTERFACE_TEST_OBJS) \ -L. -lvpx -lgtest -lvp9rc $(extralibs) -lm)) endif # RC_INTERFACE_TEST +endif # CONFIG_VP9_ENCODER ifneq ($(strip $(SIMPLE_ENCODE_TEST_OBJS)),) $(SIMPLE_ENCODE_TEST_OBJS) $(SIMPLE_ENCODE_TEST_OBJS:.o=.d): \ diff -Nru libvpx-1.10.0/test/encode_api_test.cc libvpx-1.11.0/test/encode_api_test.cc --- libvpx-1.10.0/test/encode_api_test.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/encode_api_test.cc 2021-10-06 17:41:19.000000000 +0000 @@ -8,6 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include +#include + #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" @@ -18,6 +21,12 @@ #define NELEMENTS(x) static_cast(sizeof(x) / sizeof(x[0])) +bool IsVP9(const vpx_codec_iface_t *iface) { + static const char kVP9Name[] = "WebM Project VP9"; + return strncmp(kVP9Name, vpx_codec_iface_name(iface), sizeof(kVP9Name) - 1) == + 0; +} + TEST(EncodeAPI, InvalidParams) { static const vpx_codec_iface_t *kCodecs[] = { #if CONFIG_VP8_ENCODER @@ -184,10 +193,7 @@ } // VP9 should report incapable, VP8 invalid for all configurations. - const char kVP9Name[] = "WebM Project VP9"; - const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface), - sizeof(kVP9Name) - 1) == 0; - EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM, + EXPECT_EQ(IsVP9(iface) ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0])); for (int i = 0; i < 2; i++) { @@ -196,4 +202,112 @@ } } +TEST(EncodeAPI, SetRoi) { + static struct { + const vpx_codec_iface_t *iface; + int ctrl_id; + } kCodecs[] = { +#if CONFIG_VP8_ENCODER + { &vpx_codec_vp8_cx_algo, VP8E_SET_ROI_MAP }, +#endif +#if CONFIG_VP9_ENCODER + { &vpx_codec_vp9_cx_algo, VP9E_SET_ROI_MAP }, +#endif + }; + constexpr int kWidth = 64; + constexpr int kHeight = 64; + + for (const auto &codec : kCodecs) { + SCOPED_TRACE(vpx_codec_iface_name(codec.iface)); + vpx_codec_ctx_t enc; + vpx_codec_enc_cfg_t cfg; + + EXPECT_EQ(vpx_codec_enc_config_default(codec.iface, &cfg, 0), VPX_CODEC_OK); + cfg.g_w = kWidth; + cfg.g_h = kHeight; + EXPECT_EQ(vpx_codec_enc_init(&enc, codec.iface, &cfg, 0), VPX_CODEC_OK); + + vpx_roi_map_t roi = {}; + uint8_t roi_map[kWidth * kHeight] = {}; + if (IsVP9(codec.iface)) { + roi.rows = (cfg.g_w + 7) >> 3; + roi.cols = (cfg.g_h + 7) >> 3; + } else { + roi.rows = (cfg.g_w + 15) >> 4; + roi.cols = (cfg.g_h + 15) >> 4; + } + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK); + + roi.roi_map = roi_map; + // VP8 only. This value isn't range checked. + roi.static_threshold[1] = 1000; + roi.static_threshold[2] = INT_MIN; + roi.static_threshold[3] = INT_MAX; + + for (const auto delta : { -63, -1, 0, 1, 63 }) { + for (int i = 0; i < 8; ++i) { + roi.delta_q[i] = delta; + roi.delta_lf[i] = delta; + // VP9 only. + roi.skip[i] ^= 1; + roi.ref_frame[i] = (roi.ref_frame[i] + 1) % 4; + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK); + } + } + + vpx_codec_err_t expected_error; + for (const auto delta : { -64, 64, INT_MIN, INT_MAX }) { + expected_error = VPX_CODEC_INVALID_PARAM; + for (int i = 0; i < 8; ++i) { + roi.delta_q[i] = delta; + // The max segment count for VP8 is 4, the remainder of the entries are + // ignored. + if (i >= 4 && !IsVP9(codec.iface)) expected_error = VPX_CODEC_OK; + + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error) + << "delta_q[" << i << "]: " << delta; + roi.delta_q[i] = 0; + + roi.delta_lf[i] = delta; + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error) + << "delta_lf[" << i << "]: " << delta; + roi.delta_lf[i] = 0; + } + } + + // VP8 should ignore skip[] and ref_frame[] values. + expected_error = + IsVP9(codec.iface) ? VPX_CODEC_INVALID_PARAM : VPX_CODEC_OK; + for (const auto skip : { -2, 2, INT_MIN, INT_MAX }) { + for (int i = 0; i < 8; ++i) { + roi.skip[i] = skip; + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error) + << "skip[" << i << "]: " << skip; + roi.skip[i] = 0; + } + } + + // VP9 allows negative values to be used to disable segmentation. + for (int ref_frame = -3; ref_frame < 0; ++ref_frame) { + for (int i = 0; i < 8; ++i) { + roi.ref_frame[i] = ref_frame; + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK) + << "ref_frame[" << i << "]: " << ref_frame; + roi.ref_frame[i] = 0; + } + } + + for (const auto ref_frame : { 4, INT_MIN, INT_MAX }) { + for (int i = 0; i < 8; ++i) { + roi.ref_frame[i] = ref_frame; + EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error) + << "ref_frame[" << i << "]: " << ref_frame; + roi.ref_frame[i] = 0; + } + } + + EXPECT_EQ(vpx_codec_destroy(&enc), VPX_CODEC_OK); + } +} + } // namespace diff -Nru libvpx-1.10.0/test/ratectrl_rtc_test.cc libvpx-1.11.0/test/ratectrl_rtc_test.cc --- libvpx-1.10.0/test/ratectrl_rtc_test.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/ratectrl_rtc_test.cc 2021-10-06 17:41:19.000000000 +0000 @@ -16,6 +16,7 @@ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" +#include "test/i420_video_source.h" #include "test/util.h" #include "test/video_source.h" #include "vpx/vpx_codec.h" @@ -23,130 +24,89 @@ namespace { -const size_t kNumFrame = 850; +const size_t kNumFrames = 300; -struct FrameInfo { - friend std::istream &operator>>(std::istream &is, FrameInfo &info) { - is >> info.frame_id >> info.spatial_id >> info.temporal_id >> info.base_q >> - info.target_bandwidth >> info.buffer_level >> info.filter_level_ >> - info.bytes_used; - return is; - } - int frame_id; - int spatial_id; - int temporal_id; - // Base QP - int base_q; - size_t target_bandwidth; - size_t buffer_level; - // Loopfilter level - int filter_level_; - // Frame size for current frame, used for pose encode update - size_t bytes_used; -}; +const int kTemporalId[4] = { 0, 2, 1, 2 }; -// This test runs the rate control interface and compare against ground truth -// generated by encoders. -// Settings for the encoder: -// For 1 layer: -// -// examples/vpx_temporal_svc_encoder gipsrec_motion1.1280_720.yuv out vp9 -// 1280 720 1 30 7 0 0 1 0 1000 -// -// For SVC (3 temporal layers, 3 spatial layers): -// -// examples/vp9_spatial_svc_encoder -f 10000 -w 1280 -h 720 -t 1/30 -sl 3 -// -k 10000 -bl 100,140,200,250,350,500,450,630,900 -b 1600 --rc-end-usage=1 -// --lag-in-frames=0 --passes=1 --speed=7 --threads=1 -// --temporal-layering-mode=3 -aq 1 -rcstat 1 -// gipsrec_motion1.1280_720.yuv -o out.webm -// -// - AQ_Mode 0 -// - Disable golden refresh -// - Bitrate x 2 at frame/superframe 200 -// - Bitrate / 4 at frame/superframe 400 -// -// The generated file includes: -// frame number, spatial layer ID, temporal layer ID, base QP, target -// bandwidth, buffer level, loopfilter level, encoded frame size -// TODO(jianj): Remove golden files, and run actual encoding in this test. -class RcInterfaceTest : public ::testing::Test { +class RcInterfaceTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { public: - explicit RcInterfaceTest() {} + RcInterfaceTest() + : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)), key_interval_(3000), + encoder_exit_(false) {} virtual ~RcInterfaceTest() {} protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + } + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, + libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, 7); + encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); + encoder->Control(VP9E_SET_TUNE_CONTENT, 0); + encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000); + encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1); + } + frame_params_.frame_type = + video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME; + if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) { + // Disable golden frame update. + frame_flags_ |= VP8_EFLAG_NO_UPD_GF; + frame_flags_ |= VP8_EFLAG_NO_UPD_ARF; + } + encoder_exit_ = video->frame() == kNumFrames; + } + + virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) { + if (encoder_exit_) { + return; + } + int loopfilter_level, qp; + encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level); + encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp); + rc_api_->ComputeQP(frame_params_); + ASSERT_EQ(rc_api_->GetQP(), qp); + ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level); + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + rc_api_->PostEncodeUpdate(pkt->data.frame.sz); + } + void RunOneLayer() { - SetConfigOneLayer(); + SetConfig(GET_PARAM(2)); rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_); - FrameInfo frame_info; - libvpx::VP9FrameParamsQpRTC frame_params; - frame_params.frame_type = KEY_FRAME; - frame_params.spatial_layer_id = 0; - frame_params.temporal_layer_id = 0; - std::ifstream one_layer_file; - one_layer_file.open(libvpx_test::GetDataPath() + - "/rc_interface_test_one_layer"); - ASSERT_TRUE(one_layer_file.good()); - for (size_t i = 0; i < kNumFrame; i++) { - one_layer_file >> frame_info; - if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME; - if (frame_info.frame_id == 200) { - rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth * 2; - rc_api_->UpdateRateControl(rc_cfg_); - } else if (frame_info.frame_id == 400) { - rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth / 4; - rc_api_->UpdateRateControl(rc_cfg_); - } - ASSERT_EQ(frame_info.spatial_id, 0); - ASSERT_EQ(frame_info.temporal_id, 0); - rc_api_->ComputeQP(frame_params); - ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q); - ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_); - rc_api_->PostEncodeUpdate(frame_info.bytes_used); - } + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", + 1280, 720, 30, 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } - void RunSVC() { - SetConfigSVC(); + void RunOneLayerVBRPeriodicKey() { + if (GET_PARAM(2) != VPX_VBR) return; + key_interval_ = 100; + SetConfig(VPX_VBR); rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_); - FrameInfo frame_info; - libvpx::VP9FrameParamsQpRTC frame_params; - frame_params.frame_type = KEY_FRAME; - std::ifstream svc_file; - svc_file.open(std::string(std::getenv("LIBVPX_TEST_DATA_PATH")) + - "/rc_interface_test_svc"); - ASSERT_TRUE(svc_file.good()); - for (size_t i = 0; i < kNumFrame * rc_cfg_.ss_number_layers; i++) { - svc_file >> frame_info; - if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME; - if (frame_info.frame_id == 200 * rc_cfg_.ss_number_layers) { - for (int layer = 0; - layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers; - layer++) - rc_cfg_.layer_target_bitrate[layer] *= 2; - rc_cfg_.target_bandwidth *= 2; - rc_api_->UpdateRateControl(rc_cfg_); - } else if (frame_info.frame_id == 400 * rc_cfg_.ss_number_layers) { - for (int layer = 0; - layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers; - layer++) - rc_cfg_.layer_target_bitrate[layer] /= 4; - rc_cfg_.target_bandwidth /= 4; - rc_api_->UpdateRateControl(rc_cfg_); - } - frame_params.spatial_layer_id = frame_info.spatial_id; - frame_params.temporal_layer_id = frame_info.temporal_id; - rc_api_->ComputeQP(frame_params); - ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q); - ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_); - rc_api_->PostEncodeUpdate(frame_info.bytes_used); - } + frame_params_.spatial_layer_id = 0; + frame_params_.temporal_layer_id = 0; + + ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", + 1280, 720, 30, 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } private: - void SetConfigOneLayer() { + void SetConfig(vpx_rc_mode rc_mode) { rc_cfg_.width = 1280; rc_cfg_.height = 720; rc_cfg_.max_quantizer = 52; @@ -166,9 +126,183 @@ rc_cfg_.layer_target_bitrate[0] = 1000; rc_cfg_.max_quantizers[0] = 52; rc_cfg_.min_quantizers[0] = 2; + rc_cfg_.rc_mode = rc_mode; + rc_cfg_.aq_mode = aq_mode_; + + // Encoder settings for ground truth. + cfg_.g_w = 1280; + cfg_.g_h = 720; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 0; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 52; + cfg_.rc_end_usage = rc_mode; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + cfg_.rc_target_bitrate = 1000; + cfg_.kf_min_dist = key_interval_; + cfg_.kf_max_dist = key_interval_; } - void SetConfigSVC() { + std::unique_ptr rc_api_; + libvpx::VP9RateControlRtcConfig rc_cfg_; + int aq_mode_; + int key_interval_; + libvpx::VP9FrameParamsQpRTC frame_params_; + bool encoder_exit_; +}; + +class RcInterfaceSvcTest : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWithParam { + public: + RcInterfaceSvcTest() : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)) {} + virtual ~RcInterfaceSvcTest() {} + + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + } + + virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, 7); + encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); + encoder->Control(VP9E_SET_TUNE_CONTENT, 0); + encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); + encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1); + encoder->Control(VP9E_SET_SVC, 1); + encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); + } + + frame_params_.frame_type = video->frame() == 0 ? KEY_FRAME : INTER_FRAME; + if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) { + // Disable golden frame update. + frame_flags_ |= VP8_EFLAG_NO_UPD_GF; + frame_flags_ |= VP8_EFLAG_NO_UPD_ARF; + } + encoder_exit_ = video->frame() == kNumFrames; + current_superframe_ = video->frame(); + } + + virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) { + ::libvpx_test::CxDataIterator iter = encoder->GetCxData(); + while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { + ParseSuperframeSizes(static_cast(pkt->data.frame.buf), + pkt->data.frame.sz); + for (int sl = 0; sl < rc_cfg_.ss_number_layers; sl++) { + frame_params_.spatial_layer_id = sl; + frame_params_.temporal_layer_id = kTemporalId[current_superframe_ % 4]; + rc_api_->ComputeQP(frame_params_); + frame_params_.frame_type = INTER_FRAME; + rc_api_->PostEncodeUpdate(sizes_[sl]); + } + } + if (!encoder_exit_) { + int loopfilter_level, qp; + encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level); + encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp); + ASSERT_EQ(rc_api_->GetQP(), qp); + ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level); + } + } + // This method needs to be overridden because non-reference frames are + // expected to be mismatched frames as the encoder will avoid loopfilter on + // these frames. + virtual void MismatchHook(const vpx_image_t * /*img1*/, + const vpx_image_t * /*img2*/) {} + + void RunSvc() { + SetConfigSvc(); + rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_); + SetEncoderSvc(); + + ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", + 1280, 720, 30, 1, 0, kNumFrames); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + } + + private: + vpx_codec_err_t ParseSuperframeSizes(const uint8_t *data, size_t data_sz) { + uint8_t marker = *(data + data_sz - 1); + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; + { + const uint8_t marker2 = *(data + data_sz - index_sz); + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; + } + const uint8_t *x = &data[data_sz - index_sz + 1]; + for (uint32_t i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (uint32_t j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); + sizes_[i] = this_sz; + } + } + return VPX_CODEC_OK; + } + + void SetEncoderSvc() { + cfg_.ss_number_layers = 3; + cfg_.ts_number_layers = 3; + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = 30; + svc_params_.scaling_factor_num[0] = 72; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 144; + svc_params_.scaling_factor_den[1] = 288; + svc_params_.scaling_factor_num[2] = 288; + svc_params_.scaling_factor_den[2] = 288; + for (int i = 0; i < VPX_MAX_LAYERS; ++i) { + svc_params_.max_quantizers[i] = 56; + svc_params_.min_quantizers[i] = 2; + } + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.g_error_resilient = 0; + // 3 temporal layers + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = 3; + + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.g_threads = 1; + cfg_.kf_max_dist = 9999; + cfg_.rc_target_bitrate = 1600; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_undershoot_pct = 50; + + cfg_.layer_target_bitrate[0] = 100; + cfg_.layer_target_bitrate[1] = 140; + cfg_.layer_target_bitrate[2] = 200; + cfg_.layer_target_bitrate[3] = 250; + cfg_.layer_target_bitrate[4] = 350; + cfg_.layer_target_bitrate[5] = 500; + cfg_.layer_target_bitrate[6] = 450; + cfg_.layer_target_bitrate[7] = 630; + cfg_.layer_target_bitrate[8] = 900; + } + + void SetConfigSvc() { rc_cfg_.width = 1280; rc_cfg_.height = 720; rc_cfg_.max_quantizer = 56; @@ -183,6 +317,7 @@ rc_cfg_.framerate = 30.0; rc_cfg_.ss_number_layers = 3; rc_cfg_.ts_number_layers = 3; + rc_cfg_.rc_mode = VPX_CBR; rc_cfg_.scaling_factor_num[0] = 1; rc_cfg_.scaling_factor_den[0] = 4; @@ -214,13 +349,25 @@ } } + int aq_mode_; std::unique_ptr rc_api_; libvpx::VP9RateControlRtcConfig rc_cfg_; + vpx_svc_extra_cfg_t svc_params_; + libvpx::VP9FrameParamsQpRTC frame_params_; + bool encoder_exit_; + int current_superframe_; + uint32_t sizes_[8]; }; -TEST_F(RcInterfaceTest, OneLayer) { RunOneLayer(); } +TEST_P(RcInterfaceTest, OneLayer) { RunOneLayer(); } + +TEST_P(RcInterfaceTest, OneLayerVBRPeriodicKey) { RunOneLayerVBRPeriodicKey(); } + +TEST_P(RcInterfaceSvcTest, Svc) { RunSvc(); } -TEST_F(RcInterfaceTest, SVC) { RunSVC(); } +VP9_INSTANTIATE_TEST_SUITE(RcInterfaceTest, ::testing::Values(0, 3), + ::testing::Values(VPX_CBR, VPX_VBR)); +VP9_INSTANTIATE_TEST_SUITE(RcInterfaceSvcTest, ::testing::Values(0)); } // namespace int main(int argc, char **argv) { diff -Nru libvpx-1.10.0/test/simple_encode_test.cc libvpx-1.11.0/test/simple_encode_test.cc --- libvpx-1.10.0/test/simple_encode_test.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/simple_encode_test.cc 2021-10-06 17:41:19.000000000 +0000 @@ -13,6 +13,7 @@ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/video_source.h" #include "vp9/simple_encode.h" namespace vp9 { @@ -36,7 +37,8 @@ const int frame_rate_den_ = 1; const int target_bitrate_ = 1000; const int num_frames_ = 17; - const std::string in_file_path_str_ = "bus_352x288_420_f20_b8.yuv"; + const std::string in_file_path_str_ = + libvpx_test::GetDataPath() + "/bus_352x288_420_f20_b8.yuv"; }; TEST_F(SimpleEncodeTest, ComputeFirstPassStats) { diff -Nru libvpx-1.10.0/test/test-data.mk libvpx-1.11.0/test/test-data.mk --- libvpx-1.10.0/test/test-data.mk 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/test-data.mk 2021-10-06 17:41:19.000000000 +0000 @@ -27,8 +27,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv -LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_one_layer -LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_svc LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += bus_352x288_420_f20_b8.yuv # Test vectors diff -Nru libvpx-1.10.0/test/test-data.sha1 libvpx-1.11.0/test/test-data.sha1 --- libvpx-1.10.0/test/test-data.sha1 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/test-data.sha1 2021-10-06 17:41:19.000000000 +0000 @@ -869,5 +869,3 @@ 518a0be998afece76d3df76047d51e256c591ff2 *invalid-bug-148271109.ivf d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-148271109.ivf.res ad18ca16f0a249fb3b7c38de0d9b327fed273f96 *hantro_collage_w352h288_nv12.yuv -03f827c0e36ff9a6e23c5cc11936924e4f1827ab *rc_interface_test_one_layer -99e4f4c2961d46dc286db230090a39d78460b25d *rc_interface_test_svc diff -Nru libvpx-1.10.0/test/test.mk libvpx-1.11.0/test/test.mk --- libvpx-1.10.0/test/test.mk 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/test.mk 2021-10-06 17:41:19.000000000 +0000 @@ -193,10 +193,8 @@ endif ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes) -ifneq (, $(filter yes, $(HAVE_SSE2) $(HAVE_AVX2))) LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc endif -endif LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) @@ -216,6 +214,11 @@ TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) := ratectrl_rtc_test.cc +RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) += encode_test_driver.cc +RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) += encode_test_driver.h +RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.cc +RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.h +RC_INTERFACE_TEST_SRCS-yes += codec_factory.h endif # CONFIG_SHARED diff -Nru libvpx-1.10.0/test/vp9_end_to_end_test.cc libvpx-1.11.0/test/vp9_end_to_end_test.cc --- libvpx-1.10.0/test/vp9_end_to_end_test.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/vp9_end_to_end_test.cc 2021-10-06 17:41:19.000000000 +0000 @@ -31,7 +31,7 @@ { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 }, { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 }, { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 28.0, 32.0, 32.0, 32.0, 32.0 }, - { 28.5, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 }, + { 28.4, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 }, }; typedef struct { diff -Nru libvpx-1.10.0/test/vp9_ext_ratectrl_test.cc libvpx-1.11.0/test/vp9_ext_ratectrl_test.cc --- libvpx-1.10.0/test/vp9_ext_ratectrl_test.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/test/vp9_ext_ratectrl_test.cc 2021-10-06 17:41:19.000000000 +0000 @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "test/codec_factory.h" @@ -20,7 +21,7 @@ namespace { constexpr int kModelMagicNumber = 51396; -constexpr unsigned int PrivMagicNumber = 5566; +constexpr uintptr_t PrivMagicNumber = 5566; constexpr int kFrameNum = 5; constexpr int kLosslessCodingIndex = 2; diff -Nru libvpx-1.10.0/tools_common.h libvpx-1.11.0/tools_common.h --- libvpx-1.10.0/tools_common.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/tools_common.h 2021-10-06 17:41:19.000000000 +0000 @@ -110,6 +110,8 @@ #if defined(__GNUC__) #define VPX_NO_RETURN __attribute__((noreturn)) +#elif defined(_MSC_VER) +#define VPX_NO_RETURN __declspec(noreturn) #else #define VPX_NO_RETURN #endif @@ -117,14 +119,14 @@ /* Sets a stdio stream into binary mode */ FILE *set_binary_mode(FILE *stream); -void die(const char *fmt, ...) VPX_NO_RETURN; -void fatal(const char *fmt, ...) VPX_NO_RETURN; +VPX_NO_RETURN void die(const char *fmt, ...); +VPX_NO_RETURN void fatal(const char *fmt, ...); void warn(const char *fmt, ...); -void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN; +VPX_NO_RETURN void die_codec(vpx_codec_ctx_t *ctx, const char *s); /* The tool including this file must define usage_exit() */ -void usage_exit(void) VPX_NO_RETURN; +VPX_NO_RETURN void usage_exit(void); #undef VPX_NO_RETURN diff -Nru libvpx-1.10.0/vp8/encoder/bitstream.c libvpx-1.11.0/vp8/encoder/bitstream.c --- libvpx-1.10.0/vp8/encoder/bitstream.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp8/encoder/bitstream.c 2021-10-06 17:41:19.000000000 +0000 @@ -866,7 +866,6 @@ #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_writer *const w = cpi->bc; #endif - int savings = 0; vpx_clear_system_state(); @@ -940,8 +939,6 @@ #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_write_literal(w, newp, 8); #endif - - savings += s; } } while (++t < ENTROPY_NODES); diff -Nru libvpx-1.10.0/vp8/encoder/onyx_if.c libvpx-1.11.0/vp8/encoder/onyx_if.c --- libvpx-1.10.0/vp8/encoder/onyx_if.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp8/encoder/onyx_if.c 2021-10-06 17:41:19.000000000 +0000 @@ -301,9 +301,9 @@ /* Work out the average size of a frame within this layer */ if (layer > 0) { lc->avg_frame_size_for_layer = - (int)((cpi->oxcf.target_bitrate[layer] - - cpi->oxcf.target_bitrate[layer - 1]) * - 1000 / (lc->framerate - prev_layer_framerate)); + (int)round((cpi->oxcf.target_bitrate[layer] - + cpi->oxcf.target_bitrate[layer - 1]) * + 1000 / (lc->framerate - prev_layer_framerate)); } lc->active_worst_quality = cpi->oxcf.worst_allowed_q; @@ -4919,6 +4919,8 @@ this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen; + // Cap this to avoid overflow of (this_duration - last_duration) * 10 + this_duration = VPXMIN(this_duration, INT64_MAX / 10); /* do a step update if the duration changes by 10% */ if (last_duration) { step = (int)(((this_duration - last_duration) * 10 / last_duration)); @@ -5316,17 +5318,13 @@ return -1; } - // Range check the delta Q values and convert the external Q range values - // to internal ones. - if ((abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) || - (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range)) { - return -1; - } - - // Range check the delta lf values - if ((abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) || - (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range)) { - return -1; + for (i = 0; i < MAX_MB_SEGMENTS; ++i) { + // Note abs() alone can't be used as the behavior of abs(INT_MIN) is + // undefined. + if (delta_q[i] > range || delta_q[i] < -range || delta_lf[i] > range || + delta_lf[i] < -range) { + return -1; + } } // Also disable segmentation if no deltas are specified. diff -Nru libvpx-1.10.0/vp8/encoder/ratectrl.c libvpx-1.11.0/vp8/encoder/ratectrl.c --- libvpx-1.10.0/vp8/encoder/ratectrl.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp8/encoder/ratectrl.c 2021-10-06 17:41:19.000000000 +0000 @@ -349,8 +349,12 @@ } if (cpi->oxcf.rc_max_intra_bitrate_pct) { - unsigned int max_rate = - cpi->per_frame_bandwidth * cpi->oxcf.rc_max_intra_bitrate_pct / 100; + unsigned int max_rate; + // This product may overflow unsigned int + uint64_t product = cpi->per_frame_bandwidth; + product *= cpi->oxcf.rc_max_intra_bitrate_pct; + product /= 100; + max_rate = (unsigned int)VPXMIN(INT_MAX, product); if (target > max_rate) target = max_rate; } diff -Nru libvpx-1.10.0/vp8/vp8_cx_iface.c libvpx-1.11.0/vp8/vp8_cx_iface.c --- libvpx-1.10.0/vp8/vp8_cx_iface.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp8/vp8_cx_iface.c 2021-10-06 17:41:19.000000000 +0000 @@ -152,8 +152,8 @@ RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); #endif RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q); - RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); - RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); + RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); + RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); @@ -257,6 +257,23 @@ ERROR("g_threads cannot be bigger than number of token partitions"); #endif + // The range below shall be further tuned. + RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1); + RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000); + RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000); + RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000); + RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, zm_factor.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000); + return VPX_CODEC_OK; } @@ -378,6 +395,9 @@ #endif oxcf->cpu_used = vp8_cfg.cpu_used; + if (cfg.g_pass == VPX_RC_FIRST_PASS) { + oxcf->cpu_used = VPXMAX(4, oxcf->cpu_used); + } oxcf->encode_breakout = vp8_cfg.static_thresh; oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity; @@ -1256,7 +1276,7 @@ VPX_VBR, /* rc_end_usage */ { NULL, 0 }, /* rc_twopass_stats_in */ { NULL, 0 }, /* rc_firstpass_mb_stats_in */ - 256, /* rc_target_bandwidth */ + 256, /* rc_target_bitrate */ 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ 100, /* rc_undershoot_pct */ @@ -1278,14 +1298,30 @@ VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ { 0 }, - { 0 }, /* ss_target_bitrate */ - 1, /* ts_number_layers */ - { 0 }, /* ts_target_bitrate */ - { 0 }, /* ts_rate_decimator */ - 0, /* ts_periodicity */ - { 0 }, /* ts_layer_id */ - { 0 }, /* layer_target_bitrate */ - 0 /* temporal_layering_mode */ + { 0 }, /* ss_target_bitrate */ + 1, /* ts_number_layers */ + { 0 }, /* ts_target_bitrate */ + { 0 }, /* ts_rate_decimator */ + 0, /* ts_periodicity */ + { 0 }, /* ts_layer_id */ + { 0 }, /* layer_target_bitrate */ + 0, /* temporal_layering_mode */ + 0, /* use_vizier_rc_params */ + { 1, 1 }, /* active_wq_factor */ + { 1, 1 }, /* err_per_mb_factor */ + { 1, 1 }, /* sr_default_decay_limit */ + { 1, 1 }, /* sr_diff_factor */ + { 1, 1 }, /* kf_err_per_mb_factor */ + { 1, 1 }, /* kf_frame_min_boost_factor */ + { 1, 1 }, /* kf_frame_max_boost_first_factor */ + { 1, 1 }, /* kf_frame_max_boost_subs_factor */ + { 1, 1 }, /* kf_max_total_boost_factor */ + { 1, 1 }, /* gf_max_total_boost_factor */ + { 1, 1 }, /* gf_frame_max_boost_factor */ + { 1, 1 }, /* zm_factor */ + { 1, 1 }, /* rd_mult_inter_qp_fac */ + { 1, 1 }, /* rd_mult_arf_qp_fac */ + { 1, 1 }, /* rd_mult_key_qp_fac */ } }, }; diff -Nru libvpx-1.10.0/vp9/encoder/arm/neon/vp9_denoiser_neon.c libvpx-1.11.0/vp9/encoder/arm/neon/vp9_denoiser_neon.c --- libvpx-1.10.0/vp9/encoder/arm/neon/vp9_denoiser_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/arm/neon/vp9_denoiser_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -21,6 +21,9 @@ // Compute the sum of all pixel differences of this MB. static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) { +#if defined(__aarch64__) + return vaddlvq_s8(v_sum_diff_total); +#else const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); @@ -28,6 +31,7 @@ vget_low_s64(fedcba98_76543210)); const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0); return sum_diff; +#endif } // Denoise a 16x1 vector. diff -Nru libvpx-1.10.0/vp9/encoder/vp9_aq_cyclicrefresh.c libvpx-1.11.0/vp9/encoder/vp9_aq_cyclicrefresh.c --- libvpx-1.10.0/vp9/encoder/vp9_aq_cyclicrefresh.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_aq_cyclicrefresh.c 2021-10-06 17:41:19.000000000 +0000 @@ -48,6 +48,7 @@ assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); cr->counter_encode_maxq_scene_change = 0; + cr->content_mode = 1; return cr; } @@ -326,7 +327,8 @@ else rc->baseline_gf_interval = 40; if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20; - if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40) + if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40 && + cr->content_mode) rc->baseline_gf_interval = 10; } @@ -388,7 +390,8 @@ ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex); // More aggressive settings for noisy content. - if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { + if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium && + cr->content_mode) { consec_zero_mv_thresh = 60; qindex_thresh = VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex), @@ -409,7 +412,7 @@ #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) compute_content = 0; #endif - if (cpi->Last_Source == NULL || + if (cr->content_mode == 0 || cpi->Last_Source == NULL || cpi->Last_Source->y_width != cpi->Source->y_width || cpi->Last_Source->y_height != cpi->Source->y_height) compute_content = 0; @@ -430,7 +433,8 @@ // reset to 0 later depending on the coding mode. if (cr->map[bl_index2] == 0) { count_tot++; - if (cr->last_coded_q_map[bl_index2] > qindex_thresh || + if (cr->content_mode == 0 || + cr->last_coded_q_map[bl_index2] > qindex_thresh || cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh_block) { sum_map++; count_sel++; @@ -489,7 +493,8 @@ rc->avg_frame_qindex[INTER_FRAME] < qp_thresh || (cpi->use_svc && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || - (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion && + (!cpi->use_svc && cr->content_mode && + rc->avg_frame_low_motion < thresh_low_motion && rc->frames_since_key > 40) || (!cpi->use_svc && rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh && rc->frames_since_key > 20)) { @@ -511,7 +516,8 @@ cr->rate_ratio_qdelta = 3.0; } else { cr->rate_ratio_qdelta = 2.0; - if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { + if (cr->content_mode && cpi->noise_estimate.enabled && + cpi->noise_estimate.level >= kMedium) { // Reduce the delta-qp if the estimated source noise is above threshold. cr->rate_ratio_qdelta = 1.7; cr->rate_boost_fac = 13; @@ -528,7 +534,7 @@ cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10; // Increase the amount of refresh on scene change that is encoded at max Q, // increase for a few cycles of the refresh period (~100 / percent_refresh). - if (cr->counter_encode_maxq_scene_change < 30) + if (cr->content_mode && cr->counter_encode_maxq_scene_change < 30) cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15; cr->rate_ratio_qdelta = 2.0; cr->rate_boost_fac = 10; @@ -575,6 +581,12 @@ (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) / num8x8bl; cr->weight_segment = weight_segment; + if (cr->content_mode == 0) { + cr->actual_num_seg1_blocks = + cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; + cr->actual_num_seg2_blocks = 0; + cr->weight_segment = (double)(cr->actual_num_seg1_blocks) / num8x8bl; + } } // Setup cyclic background refresh: set delta q and segmentation map. diff -Nru libvpx-1.10.0/vp9/encoder/vp9_aq_cyclicrefresh.h libvpx-1.11.0/vp9/encoder/vp9_aq_cyclicrefresh.h --- libvpx-1.10.0/vp9/encoder/vp9_aq_cyclicrefresh.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_aq_cyclicrefresh.h 2021-10-06 17:41:19.000000000 +0000 @@ -70,6 +70,7 @@ int apply_cyclic_refresh; int counter_encode_maxq_scene_change; int skip_flat_static_blocks; + int content_mode; }; struct VP9_COMP; diff -Nru libvpx-1.10.0/vp9/encoder/vp9_block.h libvpx-1.11.0/vp9/encoder/vp9_block.h --- libvpx-1.10.0/vp9/encoder/vp9_block.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_block.h 2021-10-06 17:41:19.000000000 +0000 @@ -157,6 +157,9 @@ // skip forward transform and quantization uint8_t skip_txfm[MAX_MB_PLANE << 2]; #define SKIP_TXFM_NONE 0 +// TODO(chengchen): consider remove SKIP_TXFM_AC_DC from vp9 completely +// since it increases risks of bad perceptual quality. +// https://crbug.com/webm/1729 #define SKIP_TXFM_AC_DC 1 #define SKIP_TXFM_AC_ONLY 2 diff -Nru libvpx-1.10.0/vp9/encoder/vp9_encodeframe.c libvpx-1.11.0/vp9/encoder/vp9_encodeframe.c --- libvpx-1.10.0/vp9/encoder/vp9_encodeframe.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_encodeframe.c 2021-10-06 17:41:19.000000000 +0000 @@ -159,37 +159,6 @@ } #endif // CONFIG_VP9_HIGHBITDEPTH -#if !CONFIG_REALTIME_ONLY -static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - int mi_row, int mi_col, - BLOCK_SIZE bs) { - unsigned int sse, var; - uint8_t *last_y; - const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); - - assert(last != NULL); - last_y = - &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; - var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); - return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); -} - -static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, - int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance( - cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); - if (var < 8) - return BLOCK_64X64; - else if (var < 128) - return BLOCK_32X32; - else if (var < 2048) - return BLOCK_16X16; - else - return BLOCK_8X8; -} -#endif // !CONFIG_REALTIME_ONLY - static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize, int segment_index) { VP9_COMMON *const cm = &cpi->common; @@ -815,8 +784,8 @@ // Check if most of the superblock is skin content, and if so, force split to // 32x32, and set x->sb_is_skin for use in mode selection. -static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, - int mi_row, int mi_col, int *force_split) { +static int skin_sb_split(VP9_COMP *cpi, const int low_res, int mi_row, + int mi_col, int *force_split) { VP9_COMMON *const cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) return 0; @@ -828,11 +797,6 @@ mi_row + 8 < cm->mi_rows)) { int num_16x16_skin = 0; int num_16x16_nonskin = 0; - uint8_t *ysignal = x->plane[0].src.buf; - uint8_t *usignal = x->plane[1].src.buf; - uint8_t *vsignal = x->plane[2].src.buf; - int sp = x->plane[0].src.stride; - int spuv = x->plane[1].src.stride; const int block_index = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; @@ -851,13 +815,7 @@ i = ymis; break; } - ysignal += 16; - usignal += 8; - vsignal += 8; - } - ysignal += (sp << 4) - 64; - usignal += (spuv << 3) - 32; - vsignal += (spuv << 3) - 32; + } } if (num_16x16_skin > 12) { *force_split = 1; @@ -1534,8 +1492,7 @@ vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); if (cpi->use_skin_detection) - x->sb_is_skin = - skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split); + x->sb_is_skin = skin_sb_split(cpi, low_res, mi_row, mi_col, force_split); d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; @@ -1842,7 +1799,8 @@ } // Else for cyclic refresh mode update the segment map, set the segment id // and then update the quantizer. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->cyclic_refresh->content_mode) { vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, ctx->rate, ctx->dist, x->skip, p); } @@ -2539,7 +2497,8 @@ if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) { // Setting segmentation map for cyclic_refresh. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->cyclic_refresh->content_mode) { vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, ctx->rate, ctx->dist, x->skip, p); } else { @@ -3119,54 +3078,6 @@ memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); } -#if CONFIG_FP_MB_STATS -const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 4, 4 }; -const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, - 2, 1, 2, 4, 2, 4 }; -const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0, 10, 10, 30, 40, - 40, 60, 80, 80, 90, - 100, 100, 120 }; -const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0, 3, 3, 7, 15, - 15, 30, 40, 40, 60, - 80, 80, 120 }; -const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 4, 4, 6 }; - -typedef enum { - MV_ZERO = 0, - MV_LEFT = 1, - MV_UP = 2, - MV_RIGHT = 3, - MV_DOWN = 4, - MV_INVALID -} MOTION_DIRECTION; - -static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) { - if (fp_byte & FPMB_MOTION_ZERO_MASK) { - return MV_ZERO; - } else if (fp_byte & FPMB_MOTION_LEFT_MASK) { - return MV_LEFT; - } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) { - return MV_RIGHT; - } else if (fp_byte & FPMB_MOTION_UP_MASK) { - return MV_UP; - } else { - return MV_DOWN; - } -} - -static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, - MOTION_DIRECTION that_mv) { - if (this_mv == that_mv) { - return 0; - } else { - return abs(this_mv - that_mv) == 2 ? 2 : 1; - } -} -#endif - // Calculate prediction based on the given input features and neural net config. // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden // layer. @@ -4064,11 +3975,6 @@ BLOCK_SIZE min_size = x->min_partition_size; BLOCK_SIZE max_size = x->max_partition_size; -#if CONFIG_FP_MB_STATS - unsigned int src_diff_var = UINT_MAX; - int none_complexity = 0; -#endif - int partition_none_allowed = !force_horz_split && !force_vert_split; int partition_horz_allowed = !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; @@ -4155,65 +4061,6 @@ save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, - mi_col, bsize); - } -#endif - -#if CONFIG_FP_MB_STATS - // Decide whether we shall split directly and skip searching NONE by using - // the first pass block statistics - if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split && - partition_none_allowed && src_diff_var > 4 && - cm->base_qindex < qindex_split_threshold_lookup[bsize]) { - int mb_row = mi_row >> 1; - int mb_col = mi_col >> 1; - int mb_row_end = - VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); - int mb_col_end = - VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); - int r, c; - - // compute a complexity measure, basically measure inconsistency of motion - // vectors obtained from the first pass in the current block - for (r = mb_row; r < mb_row_end; r++) { - for (c = mb_col; c < mb_col_end; c++) { - const int mb_index = r * cm->mb_cols + c; - - MOTION_DIRECTION this_mv; - MOTION_DIRECTION right_mv; - MOTION_DIRECTION bottom_mv; - - this_mv = - get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]); - - // to its right - if (c != mb_col_end - 1) { - right_mv = get_motion_direction_fp( - cpi->twopass.this_frame_mb_stats[mb_index + 1]); - none_complexity += get_motion_inconsistency(this_mv, right_mv); - } - - // to its bottom - if (r != mb_row_end - 1) { - bottom_mv = get_motion_direction_fp( - cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]); - none_complexity += get_motion_inconsistency(this_mv, bottom_mv); - } - - // do not count its left and top neighbors to avoid double counting - } - } - - if (none_complexity > complexity_16x16_blocks_threshold[bsize]) { - partition_none_allowed = 0; - } - } -#endif - pc_tree->partitioning = PARTITION_NONE; if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { @@ -4291,53 +4138,6 @@ } } } - -#if CONFIG_FP_MB_STATS - // Check if every 16x16 first pass block statistics has zero - // motion and the corresponding first pass residue is small enough. - // If that is the case, check the difference variance between the - // current frame and the last frame. If the variance is small enough, - // stop further splitting in RD optimization - if (cpi->use_fp_mb_stats && do_split != 0 && - cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { - int mb_row = mi_row >> 1; - int mb_col = mi_col >> 1; - int mb_row_end = - VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); - int mb_col_end = - VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); - int r, c; - - int skip = 1; - for (r = mb_row; r < mb_row_end; r++) { - for (c = mb_col; c < mb_col_end; c++) { - const int mb_index = r * cm->mb_cols + c; - if (!(cpi->twopass.this_frame_mb_stats[mb_index] & - FPMB_MOTION_ZERO_MASK) || - !(cpi->twopass.this_frame_mb_stats[mb_index] & - FPMB_ERROR_SMALL_MASK)) { - skip = 0; - break; - } - } - if (skip == 0) { - break; - } - } - - if (skip) { - if (src_diff_var == UINT_MAX) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - src_diff_var = get_sby_perpixel_diff_variance( - cpi, &x->plane[0].src, mi_row, mi_col, bsize); - } - if (src_diff_var < 8) { - do_split = 0; - do_rect = 0; - } - } - } -#endif } } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -4603,15 +4403,18 @@ encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); #if CONFIG_RATE_CTRL - // Store partition, motion vector of the superblock. - if (output_enabled) { - const int num_unit_rows = get_num_unit_4x4(cpi->frame_info.frame_height); - const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width); - store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride, - num_4x4_blocks_wide_lookup[BLOCK_64X64], - num_unit_rows, num_unit_cols, mi_row << 1, - mi_col << 1, cpi->partition_info, - cpi->motion_vector_info); + if (oxcf->use_simple_encode_api) { + // Store partition, motion vector of the superblock. + if (output_enabled) { + const int num_unit_rows = + get_num_unit_4x4(cpi->frame_info.frame_height); + const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width); + store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride, + num_4x4_blocks_wide_lookup[BLOCK_64X64], + num_unit_rows, num_unit_cols, mi_row << 1, + mi_col << 1, cpi->partition_info, + cpi->motion_vector_info); + } } #endif // CONFIG_RATE_CTRL } @@ -4700,13 +4503,6 @@ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); - } else if (cpi->partition_search_skippable_frame) { - BLOCK_SIZE bsize; - set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); - bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); - set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); - rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1, td->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME) { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); @@ -5981,9 +5777,14 @@ for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; -#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL +#if CONFIG_RATE_CTRL + if (cpi->oxcf.use_simple_encode_api) { + tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; + } +#endif // CONFIG_RATE_CTRL +#if CONFIG_CONSISTENT_RECODE tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; -#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL +#endif // CONFIG_CONSISTENT_RECODE tile_data->mode_map[i][j] = j; } } @@ -6072,20 +5873,6 @@ vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); } -#if CONFIG_FP_MB_STATS -static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, - VP9_COMMON *cm, uint8_t **this_frame_mb_stats) { - uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start + - cm->current_video_frame * cm->MBs * sizeof(uint8_t); - - if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF; - - *this_frame_mb_stats = mb_stats_in; - - return 1; -} -#endif - static int compare_kmeans_data(const void *a, const void *b) { if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) { return 1; @@ -6292,13 +6079,6 @@ struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, - &cpi->twopass.this_frame_mb_stats); - } -#endif - if (!cpi->row_mt) { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; @@ -6406,7 +6186,12 @@ void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; -#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL +#if CONFIG_RATE_CTRL + if (cpi->oxcf.use_simple_encode_api) { + restore_encode_params(cpi); + } +#endif // CONFIG_RATE_CTRL +#if CONFIG_CONSISTENT_RECODE restore_encode_params(cpi); #endif @@ -6703,7 +6488,8 @@ ++td->counts->tx.tx_totals[mi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; - if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && + cpi->cyclic_refresh->content_mode) vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && (!cpi->use_svc || diff -Nru libvpx-1.10.0/vp9/encoder/vp9_encoder.c libvpx-1.11.0/vp9/encoder/vp9_encoder.c --- libvpx-1.10.0/vp9/encoder/vp9_encoder.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_encoder.c 2021-10-06 17:41:19.000000000 +0000 @@ -654,10 +654,15 @@ } static int check_seg_range(int seg_data[8], int range) { - return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range || - abs(seg_data[2]) > range || abs(seg_data[3]) > range || - abs(seg_data[4]) > range || abs(seg_data[5]) > range || - abs(seg_data[6]) > range || abs(seg_data[7]) > range); + int i; + for (i = 0; i < 8; ++i) { + // Note abs() alone can't be used as the behavior of abs(INT_MIN) is + // undefined. + if (seg_data[i] > range || seg_data[i] < -range) { + return 0; + } + } + return 1; } VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) { @@ -1022,10 +1027,12 @@ cpi->mi_ssim_rdmult_scaling_factors = NULL; #if CONFIG_RATE_CTRL - free_partition_info(cpi); - free_motion_vector_info(cpi); - free_fp_motion_vector_info(cpi); - free_tpl_stats_info(cpi); + if (cpi->oxcf.use_simple_encode_api) { + free_partition_info(cpi); + free_motion_vector_info(cpi); + free_fp_motion_vector_info(cpi); + free_tpl_stats_info(cpi); + } #endif vp9_free_ref_frame_buffers(cm->buffer_pool); @@ -2302,6 +2309,7 @@ cm, cm->frame_contexts, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); + cpi->compute_frame_low_motion_onepass = 1; cpi->use_svc = 0; cpi->resize_state = ORIG; cpi->external_resize = 0; @@ -2320,7 +2328,6 @@ vp9_init_rd_parameters(cpi); init_frame_indexes(cm); - cpi->partition_search_skippable_frame = 0; cpi->tile_data = NULL; realloc_segmentation_maps(cpi); @@ -2361,17 +2368,6 @@ vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); } -#if CONFIG_FP_MB_STATS - cpi->use_fp_mb_stats = 0; - if (cpi->use_fp_mb_stats) { - // a place holder used to store the first pass mb stats in the first pass - CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf, - vpx_calloc(cm->MBs * sizeof(uint8_t), 1)); - } else { - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif - cpi->refresh_alt_ref_frame = 0; cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; @@ -2524,18 +2520,6 @@ vp9_init_second_pass_spatial_svc(cpi); } else { int num_frames; -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - const size_t psz = cpi->common.MBs * sizeof(uint8_t); - const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz); - - cpi->twopass.firstpass_mb_stats.mb_stats_start = - oxcf->firstpass_mb_stats_in.buf; - cpi->twopass.firstpass_mb_stats.mb_stats_end = - cpi->twopass.firstpass_mb_stats.mb_stats_start + - (ps - 1) * cpi->common.MBs * sizeof(uint8_t); - } -#endif cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; @@ -2669,10 +2653,12 @@ #if CONFIG_RATE_CTRL encode_command_init(&cpi->encode_command); - partition_info_init(cpi); - motion_vector_info_init(cpi); - fp_motion_vector_info_init(cpi); - tpl_stats_info_init(cpi); + if (oxcf->use_simple_encode_api) { + partition_info_init(cpi); + motion_vector_info_init(cpi); + fp_motion_vector_info_init(cpi); + tpl_stats_info_init(cpi); + } #endif return cpi; @@ -2837,13 +2823,6 @@ vpx_free(cpi->mbgraph_stats[i].mb_stats); } -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - vpx_free(cpi->twopass.frame_mb_stats_buf); - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif - vp9_extrc_delete(&cpi->ext_ratectrl); vp9_remove_common(cm); @@ -3704,6 +3683,10 @@ cpi->rc.force_max_q = 0; } + if (cpi->use_svc) { + cpi->svc.base_qindex[cpi->svc.spatial_layer_id] = *q; + } + if (!frame_is_intra_only(cm)) { vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH); } @@ -4204,7 +4187,7 @@ // Update some stats from cyclic refresh, and check for golden frame update. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - !frame_is_intra_only(cm)) + !frame_is_intra_only(cm) && cpi->cyclic_refresh->content_mode) vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution @@ -4398,6 +4381,7 @@ int frame_over_shoot_limit; int frame_under_shoot_limit; int q = 0, q_low = 0, q_high = 0; + int last_q_attempt = 0; int enable_acl; #ifdef AGGRESSIVE_VBR int qrange_adj = 1; @@ -4413,6 +4397,7 @@ // passed in by the external rate control model. // case: -1, we take VP9's decision for the max frame size. int ext_rc_max_frame_size = 0; + const int orig_rc_max_frame_bandwidth = rc->max_frame_bandwidth; #if CONFIG_RATE_CTRL const FRAME_UPDATE_TYPE update_type = @@ -4468,11 +4453,6 @@ loop_at_this_size = 0; } -#if CONFIG_RATE_CTRL - if (cpi->encode_command.use_external_target_frame_bits) { - q = rq_model_predict_q_index(rq_model, rq_history, rc->this_frame_target); - } -#endif // CONFIG_RATE_CTRL // Decide frame size bounds first time through. if (loop_count == 0) { vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target, @@ -4515,10 +4495,16 @@ #if CONFIG_RATE_CTRL // TODO(angiebird): This is a hack for making sure the encoder use the // external_quantize_index exactly. Avoid this kind of hack later. - if (cpi->encode_command.use_external_quantize_index) { - q = cpi->encode_command.external_quantize_index; + if (cpi->oxcf.use_simple_encode_api) { + if (cpi->encode_command.use_external_target_frame_bits) { + q = rq_model_predict_q_index(rq_model, rq_history, + rc->this_frame_target); + } + if (cpi->encode_command.use_external_quantize_index) { + q = cpi->encode_command.external_quantize_index; + } } -#endif +#endif // CONFIG_RATE_CTRL if (cpi->ext_ratectrl.ready && !ext_rc_recode) { vpx_codec_err_t codec_status; const GF_GROUP *gf_group = &cpi->twopass.gf_group; @@ -4580,6 +4566,7 @@ } if (cpi->ext_ratectrl.ready) { + last_q_attempt = q; // In general, for the external rate control, we take the qindex provided // as input and encode the frame with this qindex faithfully. However, // in some extreme scenarios, the provided qindex leads to a massive @@ -4597,36 +4584,43 @@ break; } } + rc->max_frame_bandwidth = ext_rc_max_frame_size; + // If the current frame size exceeds the ext_rc_max_frame_size, + // we adjust the worst qindex to meet the frame size constraint. + q_high = 255; ext_rc_recode = 1; } #if CONFIG_RATE_CTRL - // This part needs to be after save_coding_context() because - // restore_coding_context will be called in the end of this function. - // TODO(angiebird): This is a hack for making sure the encoder use the - // external_quantize_index exactly. Avoid this kind of hack later. - if (cpi->encode_command.use_external_quantize_index) { - break; - } - - if (cpi->encode_command.use_external_target_frame_bits) { - const double percent_diff = get_bits_percent_diff( - rc->this_frame_target, rc->projected_frame_size); - update_rq_history(rq_history, rc->this_frame_target, - rc->projected_frame_size, q); - loop_count += 1; - - rq_model_update(rq_history, rc->this_frame_target, rq_model); - - // Check if we hit the target bitrate. - if (percent_diff <= cpi->encode_command.target_frame_bits_error_percent || - rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM || - rq_history->q_index_low >= rq_history->q_index_high) { + if (cpi->oxcf.use_simple_encode_api) { + // This part needs to be after save_coding_context() because + // restore_coding_context will be called in the end of this function. + // TODO(angiebird): This is a hack for making sure the encoder use the + // external_quantize_index exactly. Avoid this kind of hack later. + if (cpi->encode_command.use_external_quantize_index) { break; } - loop = 1; - restore_coding_context(cpi); - continue; + if (cpi->encode_command.use_external_target_frame_bits) { + const double percent_diff = get_bits_percent_diff( + rc->this_frame_target, rc->projected_frame_size); + update_rq_history(rq_history, rc->this_frame_target, + rc->projected_frame_size, q); + loop_count += 1; + + rq_model_update(rq_history, rc->this_frame_target, rq_model); + + // Check if we hit the target bitrate. + if (percent_diff <= + cpi->encode_command.target_frame_bits_error_percent || + rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM || + rq_history->q_index_low >= rq_history->q_index_high) { + break; + } + + loop = 1; + restore_coding_context(cpi); + continue; + } } #endif // CONFIG_RATE_CTRL @@ -4796,6 +4790,23 @@ rc->projected_frame_size < rc->max_frame_bandwidth) loop = 0; + // Special handling of external max frame size constraint + if (ext_rc_recode) { + // If the largest q is not able to meet the max frame size limit, + // do nothing. + if (rc->projected_frame_size > ext_rc_max_frame_size && + last_q_attempt == 255) { + break; + } + // If VP9's q selection leads to a smaller q, we force it to use + // a larger q to better approximate the external max frame size + // constraint. + if (rc->projected_frame_size > ext_rc_max_frame_size && + q <= last_q_attempt) { + q = VPXMIN(255, last_q_attempt + 1); + } + } + if (loop) { ++loop_count; ++loop_at_this_size; @@ -4809,6 +4820,8 @@ if (loop) restore_coding_context(cpi); } while (loop); + rc->max_frame_bandwidth = orig_rc_max_frame_bandwidth; + #ifdef AGGRESSIVE_VBR if (two_pass_first_group_inter(cpi)) { cpi->twopass.active_worst_quality = @@ -5342,17 +5355,81 @@ } #if !CONFIG_REALTIME_ONLY -static void update_encode_frame_result( +static void update_encode_frame_result_basic( + FRAME_UPDATE_TYPE update_type, int show_idx, int quantize_index, + ENCODE_FRAME_RESULT *encode_frame_result) { + encode_frame_result->show_idx = show_idx; + encode_frame_result->update_type = update_type; + encode_frame_result->quantize_index = quantize_index; +} + +#if CONFIG_RATE_CTRL +static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer, + IMAGE_BUFFER *image_buffer) { + const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer, + yv12_buffer->v_buffer }; + const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride, + yv12_buffer->uv_stride }; + const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width, + yv12_buffer->uv_crop_width }; + const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height, + yv12_buffer->uv_crop_height }; + int plane; + for (plane = 0; plane < 3; ++plane) { + const int src_stride = src_stride_ls[plane]; + const int w = w_ls[plane]; + const int h = h_ls[plane]; + const uint8_t *src_buf = src_buf_ls[plane]; + uint8_t *dst_buf = image_buffer->plane_buffer[plane]; + int r; + assert(image_buffer->plane_width[plane] == w); + assert(image_buffer->plane_height[plane] == h); + for (r = 0; r < h; ++r) { + memcpy(dst_buf, src_buf, sizeof(*src_buf) * w); + src_buf += src_stride; + dst_buf += w; + } + } +} +// This function will update extra information specific for simple_encode APIs +static void update_encode_frame_result_simple_encode( int ref_frame_flags, FRAME_UPDATE_TYPE update_type, const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf, - RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index, + RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index, uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts, -#if CONFIG_RATE_CTRL const PARTITION_INFO *partition_info, const MOTION_VECTOR_INFO *motion_vector_info, const TplDepStats *tpl_stats_info, + ENCODE_FRAME_RESULT *encode_frame_result) { + PSNR_STATS psnr; + update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index, + quantize_index, encode_frame_result); +#if CONFIG_VP9_HIGHBITDEPTH + vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth, + input_bit_depth); +#else // CONFIG_VP9_HIGHBITDEPTH + (void)bit_depth; + (void)input_bit_depth; + vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr); +#endif // CONFIG_VP9_HIGHBITDEPTH + encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index; + + vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs, + encode_frame_result->ref_frame_coding_indexes, + encode_frame_result->ref_frame_valid_list); + + encode_frame_result->psnr = psnr.psnr[0]; + encode_frame_result->sse = psnr.sse[0]; + encode_frame_result->frame_counts = *counts; + encode_frame_result->partition_info = partition_info; + encode_frame_result->motion_vector_info = motion_vector_info; + encode_frame_result->tpl_stats_info = tpl_stats_info; + if (encode_frame_result->coded_frame.allocated) { + yv12_buffer_to_image_buffer(&coded_frame_buf->buf, + &encode_frame_result->coded_frame); + } +} #endif // CONFIG_RATE_CTRL - ENCODE_FRAME_RESULT *encode_frame_result); #endif // !CONFIG_REALTIME_ONLY static void encode_frame_to_data_rate( @@ -5447,10 +5524,14 @@ memset(cpi->mode_chosen_counts, 0, MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif -#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL +#if CONFIG_CONSISTENT_RECODE // Backup to ensure consistency between recodes save_encode_params(cpi); -#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL +#elif CONFIG_RATE_CTRL + if (cpi->oxcf.use_simple_encode_api) { + save_encode_params(cpi); + } +#endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { if (!encode_without_recode_loop(cpi, size, dest)) return; @@ -5542,10 +5623,12 @@ assert(encode_frame_result == NULL); #else // CONFIG_REALTIME_ONLY if (encode_frame_result != NULL) { - const int ref_frame_flags = get_ref_frame_flags(cpi); const RefCntBuffer *coded_frame_buf = get_ref_cnt_buffer(cm, cm->new_fb_idx); RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES]; + FRAME_UPDATE_TYPE update_type = + cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]; + int quantize_index = vp9_get_quantizer(cpi); get_ref_frame_bufs(cpi, ref_frame_bufs); // update_encode_frame_result() depends on twopass.gf_group.index and // cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and @@ -5563,15 +5646,21 @@ // This function needs to be called before vp9_update_reference_frames(). // TODO(angiebird): Improve the codebase to make the update of frame // dependent variables more robust. - update_encode_frame_result( - ref_frame_flags, - cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], - cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi), - cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts, + + update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index, + quantize_index, encode_frame_result); #if CONFIG_RATE_CTRL - cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info, + if (cpi->oxcf.use_simple_encode_api) { + const int ref_frame_flags = get_ref_frame_flags(cpi); + update_encode_frame_result_simple_encode( + ref_frame_flags, + cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], + cpi->Source, coded_frame_buf, ref_frame_bufs, quantize_index, + cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts, + cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info, + encode_frame_result); + } #endif // CONFIG_RATE_CTRL - encode_frame_result); } #endif // CONFIG_REALTIME_ONLY @@ -5637,7 +5726,8 @@ vp9_rc_postencode_update(cpi, *size); - if (oxcf->pass == 0 && !frame_is_intra_only(cm) && + if (cpi->compute_frame_low_motion_onepass && oxcf->pass == 0 && + !frame_is_intra_only(cm) && (!cpi->use_svc || (cpi->use_svc && !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && @@ -7491,7 +7581,9 @@ #endif // CONFIG_NON_GREEDY_MV #if CONFIG_RATE_CTRL - accumulate_frame_tpl_stats(cpi); + if (cpi->oxcf.use_simple_encode_api) { + accumulate_frame_tpl_stats(cpi); + } #endif // CONFIG_RATE_CTRL } @@ -7519,206 +7611,6 @@ } } -#if !CONFIG_REALTIME_ONLY -#if CONFIG_RATE_CTRL -static void copy_frame_counts(const FRAME_COUNTS *input_counts, - FRAME_COUNTS *output_counts) { - int i, j, k, l, m, n; - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { - for (j = 0; j < INTRA_MODES; ++j) { - output_counts->y_mode[i][j] = input_counts->y_mode[i][j]; - } - } - for (i = 0; i < INTRA_MODES; ++i) { - for (j = 0; j < INTRA_MODES; ++j) { - output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j]; - } - } - for (i = 0; i < PARTITION_CONTEXTS; ++i) { - for (j = 0; j < PARTITION_TYPES; ++j) { - output_counts->partition[i][j] = input_counts->partition[i][j]; - } - } - for (i = 0; i < TX_SIZES; ++i) { - for (j = 0; j < PLANE_TYPES; ++j) { - for (k = 0; k < REF_TYPES; ++k) { - for (l = 0; l < COEF_BANDS; ++l) { - for (m = 0; m < COEFF_CONTEXTS; ++m) { - output_counts->eob_branch[i][j][k][l][m] = - input_counts->eob_branch[i][j][k][l][m]; - for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) { - output_counts->coef[i][j][k][l][m][n] = - input_counts->coef[i][j][k][l][m][n]; - } - } - } - } - } - } - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { - for (j = 0; j < SWITCHABLE_FILTERS; ++j) { - output_counts->switchable_interp[i][j] = - input_counts->switchable_interp[i][j]; - } - } - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { - for (j = 0; j < INTER_MODES; ++j) { - output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j]; - } - } - for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) { - for (j = 0; j < 2; ++j) { - output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j]; - } - } - for (i = 0; i < COMP_INTER_CONTEXTS; ++i) { - for (j = 0; j < 2; ++j) { - output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j]; - } - } - for (i = 0; i < REF_CONTEXTS; ++i) { - for (j = 0; j < 2; ++j) { - for (k = 0; k < 2; ++k) { - output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k]; - } - } - } - for (i = 0; i < REF_CONTEXTS; ++i) { - for (j = 0; j < 2; ++j) { - output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j]; - } - } - for (i = 0; i < SKIP_CONTEXTS; ++i) { - for (j = 0; j < 2; ++j) { - output_counts->skip[i][j] = input_counts->skip[i][j]; - } - } - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) { - output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j]; - } - for (j = 0; j < TX_SIZES - 1; j++) { - output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j]; - } - for (j = 0; j < TX_SIZES - 2; j++) { - output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j]; - } - } - for (i = 0; i < TX_SIZES; i++) { - output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i]; - } - for (i = 0; i < MV_JOINTS; i++) { - output_counts->mv.joints[i] = input_counts->mv.joints[i]; - } - for (k = 0; k < 2; k++) { - nmv_component_counts *const comps = &output_counts->mv.comps[k]; - const nmv_component_counts *const comps_t = &input_counts->mv.comps[k]; - for (i = 0; i < 2; i++) { - comps->sign[i] = comps_t->sign[i]; - comps->class0_hp[i] = comps_t->class0_hp[i]; - comps->hp[i] = comps_t->hp[i]; - } - for (i = 0; i < MV_CLASSES; i++) { - comps->classes[i] = comps_t->classes[i]; - } - for (i = 0; i < CLASS0_SIZE; i++) { - comps->class0[i] = comps_t->class0[i]; - for (j = 0; j < MV_FP_SIZE; j++) { - comps->class0_fp[i][j] = comps_t->class0_fp[i][j]; - } - } - for (i = 0; i < MV_OFFSET_BITS; i++) { - for (j = 0; j < 2; j++) { - comps->bits[i][j] = comps_t->bits[i][j]; - } - } - for (i = 0; i < MV_FP_SIZE; i++) { - comps->fp[i] = comps_t->fp[i]; - } - } -} - -static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer, - IMAGE_BUFFER *image_buffer) { - const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer, - yv12_buffer->v_buffer }; - const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride, - yv12_buffer->uv_stride }; - const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width, - yv12_buffer->uv_crop_width }; - const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height, - yv12_buffer->uv_crop_height }; - int plane; - for (plane = 0; plane < 3; ++plane) { - const int src_stride = src_stride_ls[plane]; - const int w = w_ls[plane]; - const int h = h_ls[plane]; - const uint8_t *src_buf = src_buf_ls[plane]; - uint8_t *dst_buf = image_buffer->plane_buffer[plane]; - int r; - assert(image_buffer->plane_width[plane] == w); - assert(image_buffer->plane_height[plane] == h); - for (r = 0; r < h; ++r) { - memcpy(dst_buf, src_buf, sizeof(*src_buf) * w); - src_buf += src_stride; - dst_buf += w; - } - } -} -#endif // CONFIG_RATE_CTRL - -static void update_encode_frame_result( - int ref_frame_flags, FRAME_UPDATE_TYPE update_type, - const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf, - RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index, - uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts, -#if CONFIG_RATE_CTRL - const PARTITION_INFO *partition_info, - const MOTION_VECTOR_INFO *motion_vector_info, - const TplDepStats *tpl_stats_info, -#endif // CONFIG_RATE_CTRL - ENCODE_FRAME_RESULT *encode_frame_result) { -#if CONFIG_RATE_CTRL - PSNR_STATS psnr; -#if CONFIG_VP9_HIGHBITDEPTH - vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth, - input_bit_depth); -#else // CONFIG_VP9_HIGHBITDEPTH - (void)bit_depth; - (void)input_bit_depth; - vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr); -#endif // CONFIG_VP9_HIGHBITDEPTH - encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index; - - vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs, - encode_frame_result->ref_frame_coding_indexes, - encode_frame_result->ref_frame_valid_list); - - encode_frame_result->psnr = psnr.psnr[0]; - encode_frame_result->sse = psnr.sse[0]; - copy_frame_counts(counts, &encode_frame_result->frame_counts); - encode_frame_result->partition_info = partition_info; - encode_frame_result->motion_vector_info = motion_vector_info; - encode_frame_result->tpl_stats_info = tpl_stats_info; - if (encode_frame_result->coded_frame.allocated) { - yv12_buffer_to_image_buffer(&coded_frame_buf->buf, - &encode_frame_result->coded_frame); - } -#else // CONFIG_RATE_CTRL - (void)ref_frame_flags; - (void)bit_depth; - (void)input_bit_depth; - (void)source_frame; - (void)coded_frame_buf; - (void)ref_frame_bufs; - (void)counts; -#endif // CONFIG_RATE_CTRL - encode_frame_result->show_idx = coded_frame_buf->frame_index; - encode_frame_result->update_type = update_type; - encode_frame_result->quantize_index = quantize_index; -} -#endif // !CONFIG_REALTIME_ONLY - void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) { encode_frame_result->show_idx = -1; // Actual encoding doesn't happen. #if CONFIG_RATE_CTRL diff -Nru libvpx-1.10.0/vp9/encoder/vp9_encoder.h libvpx-1.11.0/vp9/encoder/vp9_encoder.h --- libvpx-1.10.0/vp9/encoder/vp9_encoder.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_encoder.h 2021-10-06 17:41:19.000000000 +0000 @@ -273,10 +273,6 @@ vpx_fixed_buf_t two_pass_stats_in; -#if CONFIG_FP_MB_STATS - vpx_fixed_buf_t firstpass_mb_stats_in; -#endif - vp8e_tuning tuning; vp9e_tune_content content; #if CONFIG_VP9_HIGHBITDEPTH @@ -291,6 +287,7 @@ int row_mt; unsigned int motion_vector_unit_test; int delta_q_uv; + int use_simple_encode_api; // Use SimpleEncode APIs or not } VP9EncoderConfig; static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { @@ -710,9 +707,6 @@ TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. - // For a still frame, this flag is set to 1 to skip partition search. - int partition_search_skippable_frame; - int scaled_ref_idx[REFS_PER_FRAME]; int lst_fb_idx; int gld_fb_idx; @@ -805,10 +799,6 @@ uint64_t time_pick_lpf; uint64_t time_encode_sb_row; -#if CONFIG_FP_MB_STATS - int use_fp_mb_stats; -#endif - TWO_PASS twopass; // Force recalculation of segment_ids for each mode info @@ -960,6 +950,8 @@ int compute_source_sad_onepass; + int compute_frame_low_motion_onepass; + LevelConstraint level_constraint; uint8_t *count_arf_frame_usage; diff -Nru libvpx-1.10.0/vp9/encoder/vp9_firstpass.c libvpx-1.11.0/vp9/encoder/vp9_firstpass.c --- libvpx-1.10.0/vp9/encoder/vp9_firstpass.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_firstpass.c 2021-10-06 17:41:19.000000000 +0000 @@ -54,30 +54,29 @@ #define NCOUNT_INTRA_THRESH 8192 #define NCOUNT_INTRA_FACTOR 3 -#define SR_DIFF_PART 0.0015 #define INTRA_PART 0.005 #define DEFAULT_DECAY_LIMIT 0.75 #define LOW_SR_DIFF_TRHESH 0.1 -#define SR_DIFF_MAX 128.0 #define LOW_CODED_ERR_PER_MB 10.0 #define NCOUNT_FRAME_II_THRESH 6.0 #define BASELINE_ERR_PER_MB 12500.0 #define GF_MAX_FRAME_BOOST 96.0 #ifdef AGGRESSIVE_VBR +#define KF_MIN_FRAME_BOOST 40.0 #define KF_MAX_FRAME_BOOST 80.0 #define MAX_KF_TOT_BOOST 4800 #else +#define KF_MIN_FRAME_BOOST 40.0 #define KF_MAX_FRAME_BOOST 96.0 #define MAX_KF_TOT_BOOST 5400 #endif -#define ZM_POWER_FACTOR 0.75 +#define DEFAULT_ZM_FACTOR 0.5 #define MINQ_ADJ_LIMIT 48 #define MINQ_ADJ_LIMIT_CQ 20 #define HIGH_UNDERSHOOT_RATIO 2 #define AV_WQ_FACTOR 4.0 -#define DEF_EPMB_LOW 2000.0 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001) @@ -136,17 +135,6 @@ #endif } -#if CONFIG_FP_MB_STATS -static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm, - struct vpx_codec_pkt_list *pktlist) { - struct vpx_codec_cx_pkt pkt; - pkt.kind = VPX_CODEC_FPMB_STATS_PKT; - pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats; - pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t); - vpx_codec_pkt_list_add(pktlist, &pkt); -} -#endif - static void zero_stats(FIRSTPASS_STATS *section) { section->frame = 0.0; section->weight = 0.0; @@ -954,10 +942,6 @@ int level_sample; const int mb_index = mb_row * cm->mb_cols + mb_col; -#if CONFIG_FP_MB_STATS - const int mb_index = mb_row * cm->mb_cols + mb_col; -#endif - (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c); // Adjust to the next column of MBs. @@ -1093,13 +1077,6 @@ // Accumulate the intra error. fp_acc_data->intra_error += (int64_t)this_error; -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // initialization - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - } -#endif - // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); @@ -1115,8 +1092,10 @@ vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; #if CONFIG_RATE_CTRL - // Store zero mv as default - store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0); + if (cpi->oxcf.use_simple_encode_api) { + // Store zero mv as default + store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0); + } #endif // CONFIG_RAGE_CTRL xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; @@ -1184,7 +1163,9 @@ } } #if CONFIG_RATE_CTRL - store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0); + if (cpi->oxcf.use_simple_encode_api) { + store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0); + } #endif // CONFIG_RAGE_CTRL // Search in an older reference frame. @@ -1208,7 +1189,10 @@ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error); #if CONFIG_RATE_CTRL - store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME, 1); + if (cpi->oxcf.use_simple_encode_api) { + store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME, + 1); + } #endif // CONFIG_RAGE_CTRL if (gf_motion_error < motion_error && gf_motion_error < this_error) @@ -1238,20 +1222,6 @@ best_ref_mv->row = 0; best_ref_mv->col = 0; -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // intra prediction statistics - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; - if (this_error > FPMB_ERROR_LARGE_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK; - } else if (this_error < FPMB_ERROR_SMALL_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK; - } - } -#endif - if (motion_error <= this_error) { vpx_clear_system_state(); @@ -1296,47 +1266,9 @@ *best_ref_mv = mv; -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // inter prediction statistics - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; - if (this_error > FPMB_ERROR_LARGE_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK; - } else if (this_error < FPMB_ERROR_SMALL_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK; - } - } -#endif - if (!is_zero_mv(&mv)) { ++(fp_acc_data->mvcount); -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_MOTION_ZERO_MASK; - // check estimated motion direction - if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) { - // right direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_RIGHT_MASK; - } else if (mv.as_mv.row < 0 && - abs(mv.as_mv.row) >= abs(mv.as_mv.col)) { - // up direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_UP_MASK; - } else if (mv.as_mv.col < 0 && - abs(mv.as_mv.col) >= abs(mv.as_mv.row)) { - // left direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_LEFT_MASK; - } else { - // down direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_DOWN_MASK; - } - } -#endif // Does the row vector point inwards or outwards? if (mb_row < cm->mb_rows / 2) { if (mv.row > 0) @@ -1384,7 +1316,9 @@ } else { fp_acc_data->sr_coded_error += (int64_t)this_error; #if CONFIG_RATE_CTRL - store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0); + if (cpi->oxcf.use_simple_encode_api) { + store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0); + } #endif // CONFIG_RAGE_CTRL } fp_acc_data->coded_error += (int64_t)this_error; @@ -1413,9 +1347,11 @@ vp9_tile_init(tile, cm, 0, 0); #if CONFIG_RATE_CTRL - fp_motion_vector_info_reset(cpi->frame_info.frame_width, - cpi->frame_info.frame_height, - cpi->fp_motion_vector_info); + if (cpi->oxcf.use_simple_encode_api) { + fp_motion_vector_info_reset(cpi->frame_info.frame_width, + cpi->frame_info.frame_height, + cpi->fp_motion_vector_info); + } #endif for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { @@ -1449,12 +1385,6 @@ assert(new_yv12 != NULL); assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs); - } -#endif - set_first_pass_params(cpi); vp9_set_quantizer(cpi, find_fp_qindex(cm->bit_depth)); @@ -1515,12 +1445,6 @@ twopass->this_frame_stats = fps; output_stats(&twopass->this_frame_stats); accumulate_stats(&twopass->total_stats, &fps); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - output_fpmb_stats(twopass->frame_mb_stats_buf, cm, cpi->output_pkt_list); - } -#endif } // Copy the previous Last Frame back into gf and and arf buffers if @@ -1832,50 +1756,59 @@ twopass->arnr_strength_adjustment = 0; } -static double get_sr_decay_rate(const FRAME_INFO *frame_info, +/* This function considers how the quality of prediction may be deteriorating + * with distance. It compares the coded error for the last frame and the + * second reference frame (usually two frames old) and also applies a factor + * based on the extent of INTRA coding. + * + * The decay factor is then used to reduce the contribution of frames further + * from the alt-ref or golden frame, to the bitrate boost calculation for that + * alt-ref or golden frame. + */ +static double get_sr_decay_rate(const TWO_PASS *const twopass, const FIRSTPASS_STATS *frame) { double sr_diff = (frame->sr_coded_error - frame->coded_error); double sr_decay = 1.0; - double modified_pct_inter; - double modified_pcnt_intra; - const double motion_amplitude_part = - frame->pcnt_motion * - ((frame->mvc_abs + frame->mvr_abs) / - (frame_info->frame_height + frame_info->frame_width)); - - modified_pct_inter = frame->pcnt_inter; - if ((frame->coded_error > LOW_CODED_ERR_PER_MB) && - ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < - (double)NCOUNT_FRAME_II_THRESH)) { - modified_pct_inter = - frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral; - } - modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); + // Do nothing if the second ref to last frame error difference is + // very small or even negative. if ((sr_diff > LOW_SR_DIFF_TRHESH)) { - sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX); - sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - motion_amplitude_part - - (INTRA_PART * modified_pcnt_intra); + const double sr_diff_part = + twopass->sr_diff_factor * ((sr_diff * 0.25) / frame->intra_error); + double modified_pct_inter = frame->pcnt_inter; + double modified_pcnt_intra; + + if ((frame->coded_error > LOW_CODED_ERR_PER_MB) && + ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < + (double)NCOUNT_FRAME_II_THRESH)) { + modified_pct_inter = + frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral; + } + modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); + + sr_decay = 1.0 - sr_diff_part - (INTRA_PART * modified_pcnt_intra); } - return VPXMAX(sr_decay, DEFAULT_DECAY_LIMIT); + return VPXMAX(sr_decay, twopass->sr_default_decay_limit); } // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. -static double get_zero_motion_factor(const FRAME_INFO *frame_info, +static double get_zero_motion_factor(const TWO_PASS *const twopass, const FIRSTPASS_STATS *frame_stats) { const double zero_motion_pct = frame_stats->pcnt_inter - frame_stats->pcnt_motion; - double sr_decay = get_sr_decay_rate(frame_info, frame_stats); + double sr_decay = get_sr_decay_rate(twopass, frame_stats); return VPXMIN(sr_decay, zero_motion_pct); } -static double get_prediction_decay_rate(const FRAME_INFO *frame_info, +static double get_prediction_decay_rate(const TWO_PASS *const twopass, const FIRSTPASS_STATS *frame_stats) { - const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats); - const double zero_motion_factor = - (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion), - ZM_POWER_FACTOR)); + const double sr_decay_rate = get_sr_decay_rate(twopass, frame_stats); + double zero_motion_factor = + twopass->zm_factor * (frame_stats->pcnt_inter - frame_stats->pcnt_motion); + + // Check that the zero motion factor is valid + assert(zero_motion_factor >= 0.0 && zero_motion_factor <= 1.0); return VPXMAX(zero_motion_factor, (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); @@ -1959,6 +1892,7 @@ static double calc_frame_boost(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame, + const TWO_PASS *const twopass, int avg_frame_qindex, double this_frame_mv_in_out) { double frame_boost; @@ -1967,8 +1901,8 @@ const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5); const double active_area = calculate_active_area(frame_info, this_frame); - // Underlying boost factor is based on inter error ratio. - frame_boost = (BASELINE_ERR_PER_MB * active_area) / + // Frame booost is based on inter error. + frame_boost = (twopass->err_per_mb * active_area) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); // Small adjustment for cases where there is a zoom out @@ -1978,37 +1912,25 @@ // Q correction and scalling frame_boost = frame_boost * boost_q_correction; - return VPXMIN(frame_boost, GF_MAX_FRAME_BOOST * boost_q_correction); -} - -static double kf_err_per_mb(VP9_COMP *cpi) { - const VP9_COMMON *const cm = &cpi->common; - unsigned int screen_area = (cm->width * cm->height); - - // Use a different error per mb factor for calculating boost for - // different formats. - if (screen_area < 1280 * 720) { - return 2000.0; - } else if (screen_area < 1920 * 1080) { - return 500.0; - } - return 250.0; + return VPXMIN(frame_boost, twopass->gf_frame_max_boost * boost_q_correction); } static double calc_kf_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, double *sr_accumulator, double this_frame_mv_in_out, - double max_boost) { + double zm_factor) { + TWO_PASS *const twopass = &cpi->twopass; double frame_boost; const double lq = vp9_convert_qindex_to_q( cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00); const double active_area = calculate_active_area(&cpi->frame_info, this_frame); + double max_boost; - // Underlying boost factor is based on inter error ratio. - frame_boost = (kf_err_per_mb(cpi) * active_area) / + // Frame booost is based on inter error. + frame_boost = (twopass->kf_err_per_mb * active_area) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator); // Update the accumulator for second ref error difference. @@ -2025,15 +1947,23 @@ // The 40.0 value here is an experimentally derived baseline minimum. // This value is in line with the minimum per frame boost in the alt_ref // boost calculation. - frame_boost = ((frame_boost + 40.0) * boost_q_correction); + frame_boost = + (frame_boost + twopass->kf_frame_min_boost) * boost_q_correction; - return VPXMIN(frame_boost, max_boost * boost_q_correction); + // Maximum allowed boost this frame. May be different for first vs subsequent + // key frames. + max_boost = (cpi->common.current_video_frame == 0) + ? twopass->kf_frame_max_boost_first + : twopass->kf_frame_max_boost_subs; + max_boost *= zm_factor * boost_q_correction; + + return VPXMIN(frame_boost, max_boost); } static int compute_arf_boost(const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int arf_show_idx, int f_frames, int b_frames, - int avg_frame_qindex) { + TWO_PASS *const twopass, int arf_show_idx, + int f_frames, int b_frames, int avg_frame_qindex) { + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; @@ -2064,14 +1994,14 @@ // Accumulate the effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); + decay_accumulator *= get_prediction_decay_rate(twopass, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, - avg_frame_qindex, - this_frame_mv_in_out); + boost_score += decay_accumulator * + calc_frame_boost(frame_info, this_frame, twopass, + avg_frame_qindex, this_frame_mv_in_out); } arf_boost = (int)boost_score; @@ -2104,14 +2034,14 @@ // Cumulative effect of prediction quality decay. if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); + decay_accumulator *= get_prediction_decay_rate(twopass, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, - avg_frame_qindex, - this_frame_mv_in_out); + boost_score += decay_accumulator * + calc_frame_boost(frame_info, this_frame, twopass, + avg_frame_qindex, this_frame_mv_in_out); } arf_boost += (int)boost_score; @@ -2127,8 +2057,8 @@ TWO_PASS *const twopass = &cpi->twopass; const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME]; int arf_show_idx = get_show_idx(twopass); - return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx, - f_frames, b_frames, avg_inter_frame_qindex); + return compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames, + b_frames, avg_inter_frame_qindex); } // Calculate a section intra ratio used in setting max loop filter. @@ -2543,6 +2473,9 @@ * (The following fields will remain unchanged after initialization of encoder.) * rc->static_scene_max_gf_interval * rc->min_gf_interval + * twopass->sr_diff_factor + * twopass->sr_default_decay_limit + * twopass->zm_factor * * Dynamic fields: * (The following fields will be updated before or after coding each frame.) @@ -2558,9 +2491,10 @@ */ static int get_gop_coding_frame_num( int *use_alt_ref, const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, + const TWO_PASS *const twopass, const RATE_CONTROL *rc, int gf_start_show_idx, const RANGE *active_gf_interval, double gop_intra_factor, int lag_in_frames) { + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; double loop_decay_rate = 1.00; double mv_ratio_accumulator = 0.0; double this_frame_mv_in_out = 0.0; @@ -2601,15 +2535,14 @@ // Monitor for static sections. if ((rc->frames_since_key + gop_coding_frames - 1) > 1) { - zero_motion_accumulator = - VPXMIN(zero_motion_accumulator, - get_zero_motion_factor(frame_info, next_frame)); + zero_motion_accumulator = VPXMIN( + zero_motion_accumulator, get_zero_motion_factor(twopass, next_frame)); } // Accumulate the effect of prediction quality decay. if (!flash_detected) { double last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); + loop_decay_rate = get_prediction_decay_rate(twopass, next_frame); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. @@ -2669,25 +2602,25 @@ return gop_coding_frames; } -static RANGE get_active_gf_inverval_range( - const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf, - int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) { +static RANGE get_active_gf_inverval_range_simple(int min_gf_interval, + int arf_active_or_kf, + int frames_to_key) { RANGE active_gf_interval; -#if CONFIG_RATE_CTRL - (void)frame_info; - (void)gf_start_show_idx; - (void)active_worst_quality; - (void)last_boosted_qindex; - active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2; - + active_gf_interval.min = min_gf_interval + arf_active_or_kf + 2; active_gf_interval.max = 16 + arf_active_or_kf; - if ((active_gf_interval.max <= rc->frames_to_key) && - (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { - active_gf_interval.min = rc->frames_to_key / 2; - active_gf_interval.max = rc->frames_to_key / 2; + if ((active_gf_interval.max <= frames_to_key) && + (active_gf_interval.max >= (frames_to_key - min_gf_interval))) { + active_gf_interval.min = frames_to_key / 2; + active_gf_interval.max = frames_to_key / 2; } -#else + return active_gf_interval; +} + +static RANGE get_active_gf_inverval_range( + const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf, + int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) { + RANGE active_gf_interval; int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality, frame_info->bit_depth)); int q_term = (gf_start_show_idx == 0) @@ -2725,7 +2658,6 @@ } active_gf_interval.max = VPXMAX(active_gf_interval.max, active_gf_interval.min); -#endif return active_gf_interval; } @@ -2786,9 +2718,14 @@ vpx_clear_system_state(); - active_gf_interval = get_active_gf_inverval_range( - frame_info, rc, arf_active_or_kf, gf_start_show_idx, - twopass->active_worst_quality, rc->last_boosted_qindex); + if (oxcf->use_simple_encode_api) { + active_gf_interval = get_active_gf_inverval_range_simple( + rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key); + } else { + active_gf_interval = get_active_gf_inverval_range( + frame_info, rc, arf_active_or_kf, gf_start_show_idx, + twopass->active_worst_quality, rc->last_boosted_qindex); + } if (cpi->multi_layer_arf) { int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf, @@ -2798,25 +2735,21 @@ gop_intra_factor = 1.0; } + gop_coding_frames = get_gop_coding_frame_num( + &use_alt_ref, frame_info, twopass, rc, gf_start_show_idx, + &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); + use_alt_ref &= allow_alt_ref; #if CONFIG_RATE_CTRL - { + // If the external gop_command is on, we will override the decisions + // of gop_coding_frames and use_alt_ref. + if (cpi->oxcf.use_simple_encode_api) { const GOP_COMMAND *gop_command = &cpi->encode_command.gop_command; assert(allow_alt_ref == 1); if (gop_command->use) { gop_coding_frames = gop_command_coding_frame_count(gop_command); use_alt_ref = gop_command->use_alt_ref; - } else { - gop_coding_frames = get_gop_coding_frame_num( - &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, - &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); - use_alt_ref &= allow_alt_ref; } } -#else - gop_coding_frames = get_gop_coding_frame_num( - &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, - &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); - use_alt_ref &= allow_alt_ref; #endif // Was the group length constrained by the requirement for a new KF? @@ -2836,8 +2769,8 @@ // Calculate the boost for alt ref. rc->gfu_boost = - compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames, - b_frames, avg_inter_frame_qindex); + compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames, b_frames, + avg_inter_frame_qindex); rc->source_alt_ref_pending = 1; } else { const int f_frames = gop_coding_frames - 1; @@ -2847,9 +2780,9 @@ const int gld_show_idx = VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info)); const int arf_boost = - compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames, - b_frames, avg_inter_frame_qindex); - rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost); + compute_arf_boost(frame_info, twopass, gld_show_idx, f_frames, b_frames, + avg_inter_frame_qindex); + rc->gfu_boost = VPXMIN((int)twopass->gf_max_total_boost, arf_boost); rc->source_alt_ref_pending = 0; } @@ -2952,7 +2885,9 @@ cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone), group_av_noise, vbr_group_bits_per_frame); twopass->active_worst_quality = - (tmp_q + (twopass->active_worst_quality * 3)) >> 2; + (int)((tmp_q + (twopass->active_worst_quality * + (twopass->active_wq_factor - 1))) / + twopass->active_wq_factor); #if CONFIG_ALWAYS_ADJUST_BPM // Reset rolling actual and target bits counters for ARF groups. @@ -3173,9 +3108,9 @@ #define KF_ABS_ZOOM_THRESH 6.0 int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int kf_show_idx, int min_gf_interval) { + const TWO_PASS *const twopass, int kf_show_idx, + int min_gf_interval) { + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; int j; int frames_to_key; @@ -3202,7 +3137,7 @@ break; // How fast is the prediction quality decaying? - loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); + loop_decay_rate = get_prediction_decay_rate(twopass, next_frame); // We want to know something about the recent past... rather than // as used elsewhere where we are concerned with decay in prediction @@ -3288,8 +3223,8 @@ kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats, mean_mod_score, av_err); - rc->frames_to_key = vp9_get_frames_to_next_key( - oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval); + rc->frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, kf_show_idx, + rc->min_gf_interval); // If there is a max kf interval set by the user we must obey it. // We already breakout of the loop above at 2x max. @@ -3371,7 +3306,7 @@ if (i > 0) { zero_motion_accumulator = VPXMIN(zero_motion_accumulator, - get_zero_motion_factor(&cpi->frame_info, &next_frame)); + get_zero_motion_factor(twopass, &next_frame)); } else { zero_motion_accumulator = next_frame.pcnt_inter - next_frame.pcnt_motion; @@ -3385,8 +3320,8 @@ // the first key frame or it points to a refernce before the new key // frame. if (i < 2) sr_accumulator = 0.0; - frame_boost = calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0, - KF_MAX_FRAME_BOOST * zm_factor); + frame_boost = + calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0, zm_factor); boost_score += frame_boost; @@ -3415,12 +3350,12 @@ // Special case for static / slide show content but dont apply // if the kf group is very short. if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) { - rc->kf_boost = MAX_KF_TOT_BOOST; + rc->kf_boost = (int)(twopass->kf_max_total_boost); } else { - // Apply various clamps for min and max boost + // Apply various clamps for min and max oost rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST); - rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST); + rc->kf_boost = VPXMIN(rc->kf_boost, (int)(twopass->kf_max_total_boost)); } // Work out how many bits to allocate for the key frame itself. @@ -3456,128 +3391,64 @@ } } -static int is_skippable_frame(const VP9_COMP *cpi) { - // If the current frame does not have non-zero motion vector detected in the - // first pass, and so do its previous and forward frames, then this frame - // can be skipped for partition check, and the partition size is assigned - // according to the variance - const TWO_PASS *const twopass = &cpi->twopass; - - return (!frame_is_intra_only(&cpi->common) && - twopass->stats_in - 2 > twopass->stats_in_start && - twopass->stats_in < twopass->stats_in_end && - (twopass->stats_in - 1)->pcnt_inter - - (twopass->stats_in - 1)->pcnt_motion == - 1 && - (twopass->stats_in - 2)->pcnt_inter - - (twopass->stats_in - 2)->pcnt_motion == - 1 && - twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1); -} - // Configure image size specific vizier parameters. // Later these will be set via additional command line options -static void init_vizier_params(RATE_CONTROL *const rc, int screen_area) { - if (1) { - // Force defaults for now - rc->active_wq_factor = AV_WQ_FACTOR; - rc->base_err_per_mb = BASELINE_ERR_PER_MB; - rc->sr_default_decay_limit = DEFAULT_DECAY_LIMIT; - rc->sr_diff_part = SR_DIFF_PART; - rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = DEF_EPMB_LOW; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; // Max for first kf. - rc->kf_frame_max_boost_subs = KF_MAX_FRAME_BOOST / 2; // Max for other kfs. - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = ZM_POWER_FACTOR; - } else { - // Vizer experimental parameters from training. - // Later these will be set via the command line. - if (screen_area <= 176 * 144) { - rc->active_wq_factor = 46.0; - rc->base_err_per_mb = 37597.399760969536; - rc->sr_default_decay_limit = 0.3905639800962774; - rc->sr_diff_part = 0.009599023654146284; - rc->gf_frame_max_boost = 87.27362648627846; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 1854.8255436877148; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 2.93715229184991; - } else if (screen_area <= 320 * 240) { - rc->active_wq_factor = 55.0; - rc->base_err_per_mb = 34525.33177195309; - rc->sr_default_decay_limit = 0.23901360046804604; - rc->sr_diff_part = 0.008581014394766773; - rc->gf_frame_max_boost = 127.34978204980285; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 723.8337508755031; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 3.5299221493593413; - } else if (screen_area <= 640 * 360) { - rc->active_wq_factor = 12.5; - rc->base_err_per_mb = 18823.978018028298; - rc->sr_default_decay_limit = 0.6043527690301296; - rc->sr_diff_part = 0.00343296783885544; - rc->gf_frame_max_boost = 75.17672317013668; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 422.2871502380377; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 2.265742666649307; - } else if (screen_area <= 854 * 480) { - rc->active_wq_factor = 51.5; - rc->base_err_per_mb = 33718.98307662595; - rc->sr_default_decay_limit = 0.33633414970713393; - rc->sr_diff_part = 0.00868988716928333; - rc->gf_frame_max_boost = 85.2868528581522; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 1513.4883914008383; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 3.552278528517416; - } else if (screen_area <= 1280 * 720) { - rc->active_wq_factor = 41.5; - rc->base_err_per_mb = 29527.46375825401; - rc->sr_default_decay_limit = 0.5009117586299728; - rc->sr_diff_part = 0.005007364627260114; - rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 998.6342911785146; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 2.568627575572356; - } else if (screen_area <= 1920 * 1080) { - rc->active_wq_factor = 31.0; - rc->base_err_per_mb = 34474.723463367416; - rc->sr_default_decay_limit = 0.23346886902707745; - rc->sr_diff_part = 0.011431716637966029; - rc->gf_frame_max_boost = 81.00472969483079; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = 35931.25734431429; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = 5.5776463538431935; +void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area) { + // When |use_vizier_rc_params| is 1, we expect the rc parameters below to + // have been initialised on the command line as adjustment factors such + // that a factor of 1.0 will match the default behavior when + // |use_vizier_rc_params| is 0 + if (twopass->use_vizier_rc_params) { + twopass->active_wq_factor *= AV_WQ_FACTOR; + twopass->err_per_mb *= BASELINE_ERR_PER_MB; + twopass->sr_default_decay_limit *= DEFAULT_DECAY_LIMIT; + if (twopass->sr_default_decay_limit > 1.0) // > 1.0 here makes no sense + twopass->sr_default_decay_limit = 1.0; + twopass->sr_diff_factor *= 1.0; + twopass->gf_frame_max_boost *= GF_MAX_FRAME_BOOST; + twopass->gf_max_total_boost *= MAX_GF_BOOST; + // NOTE: In use max boost has precedence over min boost. So even if min is + // somehow set higher than max the final boost value will be clamped to the + // appropriate maximum. + twopass->kf_frame_min_boost *= KF_MIN_FRAME_BOOST; + twopass->kf_frame_max_boost_first *= KF_MAX_FRAME_BOOST; + twopass->kf_frame_max_boost_subs *= KF_MAX_FRAME_BOOST; + twopass->kf_max_total_boost *= MAX_KF_TOT_BOOST; + twopass->zm_factor *= DEFAULT_ZM_FACTOR; + if (twopass->zm_factor > 1.0) // > 1.0 here makes no sense + twopass->zm_factor = 1.0; + + // Correction for the fact that the kf_err_per_mb_factor default is + // already different for different video formats and ensures that a passed + // in value of 1.0 on the vizier command line will still match the current + // default. + if (screen_area < 1280 * 720) { + twopass->kf_err_per_mb *= 2000.0; + } else if (screen_area < 1920 * 1080) { + twopass->kf_err_per_mb *= 500.0; } else { - rc->active_wq_factor = AV_WQ_FACTOR; - rc->base_err_per_mb = BASELINE_ERR_PER_MB; - rc->sr_default_decay_limit = DEFAULT_DECAY_LIMIT; - rc->sr_diff_part = SR_DIFF_PART; - rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST; - rc->gf_max_total_boost = MAX_GF_BOOST; - rc->kf_err_per_mb = DEF_EPMB_LOW; - rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; - rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2; - rc->kf_max_total_boost = MAX_KF_TOT_BOOST; - rc->zm_power_factor = ZM_POWER_FACTOR; + twopass->kf_err_per_mb *= 250.0; + } + } else { + // When |use_vizier_rc_params| is 0, use defaults. + twopass->active_wq_factor = AV_WQ_FACTOR; + twopass->err_per_mb = BASELINE_ERR_PER_MB; + twopass->sr_default_decay_limit = DEFAULT_DECAY_LIMIT; + twopass->sr_diff_factor = 1.0; + twopass->gf_frame_max_boost = GF_MAX_FRAME_BOOST; + twopass->gf_max_total_boost = MAX_GF_BOOST; + twopass->kf_frame_min_boost = KF_MIN_FRAME_BOOST; + twopass->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST; + twopass->kf_frame_max_boost_subs = KF_MAX_FRAME_BOOST; + twopass->kf_max_total_boost = MAX_KF_TOT_BOOST; + twopass->zm_factor = DEFAULT_ZM_FACTOR; + + if (screen_area < 1280 * 720) { + twopass->kf_err_per_mb = 2000.0; + } else if (screen_area < 1920 * 1080) { + twopass->kf_err_per_mb = 500.0; + } else { + twopass->kf_err_per_mb = 250.0; } } } @@ -3596,7 +3467,7 @@ if (cm->current_video_frame == 0) { unsigned int screen_area = (cm->width * cm->height); - init_vizier_params(rc, screen_area); + vp9_init_vizier_params(twopass, screen_area); } // If this is an arf frame then we dont want to read the stats file or @@ -3617,13 +3488,6 @@ cm->frame_type = INTER_FRAME; - // Do the firstpass stats indicate that this frame is skippable for the - // partition search? - if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - !cpi->use_svc) { - cpi->partition_search_skippable_frame = is_skippable_frame(cpi); - } - // The multiplication by 256 reverses a scaling factor of (>> 8) // applied when combining MB error values for the frame. twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0); @@ -3706,13 +3570,6 @@ vp9_configure_buffer_updates(cpi, gf_group->index); - // Do the firstpass stats indicate that this frame is skippable for the - // partition search? - if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - !cpi->use_svc) { - cpi->partition_search_skippable_frame = is_skippable_frame(cpi); - } - rc->base_frame_target = gf_group->bit_allocation[gf_group->index]; // The multiplication by 256 reverses a scaling factor of (>> 8) @@ -3863,8 +3720,7 @@ *first_is_key_frame = 0; if (rc.frames_to_key == 0) { rc.frames_to_key = vp9_get_frames_to_next_key( - &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, - *first_show_idx, rc.min_gf_interval); + &cpi->oxcf, &cpi->twopass, *first_show_idx, rc.min_gf_interval); rc.frames_since_key = 0; *first_is_key_frame = 1; } @@ -3872,18 +3728,18 @@ if (gop_command->use) { *coding_frame_count = gop_command_coding_frame_count(gop_command); *use_alt_ref = gop_command->use_alt_ref; - assert(*coding_frame_count < rc.frames_to_key); + assert(gop_command->show_frame_count <= rc.frames_to_key); } else { *coding_frame_count = vp9_get_gop_coding_frame_count( - &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, &rc, - *first_show_idx, multi_layer_arf, allow_alt_ref, *first_is_key_frame, + &cpi->oxcf, &cpi->twopass, &cpi->frame_info, &rc, *first_show_idx, + multi_layer_arf, allow_alt_ref, *first_is_key_frame, *last_gop_use_alt_ref, use_alt_ref); } } int vp9_get_gop_coding_frame_count(const VP9EncoderConfig *oxcf, + const TWO_PASS *const twopass, const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, int show_idx, int multi_layer_arf, int allow_alt_ref, int first_is_key_frame, @@ -3891,21 +3747,28 @@ int frame_count; double gop_intra_factor; const int arf_active_or_kf = last_gop_use_alt_ref || first_is_key_frame; - RANGE active_gf_interval = get_active_gf_inverval_range( - frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0, - /*last_boosted_qindex=*/0); + RANGE active_gf_interval; + int arf_layers; + if (oxcf->use_simple_encode_api) { + active_gf_interval = get_active_gf_inverval_range_simple( + rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key); + } else { + active_gf_interval = get_active_gf_inverval_range( + frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0, + /*last_boosted_qindex=*/0); + } - const int arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf, - active_gf_interval.max); + arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf, + active_gf_interval.max); if (multi_layer_arf) { gop_intra_factor = 1.0 + 0.25 * arf_layers; } else { gop_intra_factor = 1.0; } - frame_count = get_gop_coding_frame_num( - use_alt_ref, frame_info, first_pass_info, rc, show_idx, - &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames); + frame_count = get_gop_coding_frame_num(use_alt_ref, frame_info, twopass, rc, + show_idx, &active_gf_interval, + gop_intra_factor, oxcf->lag_in_frames); *use_alt_ref &= allow_alt_ref; return frame_count; } @@ -3913,9 +3776,10 @@ // Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of // coding frames (including show frame and alt ref) can be determined. int vp9_get_coding_frame_num(const VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int multi_layer_arf, int allow_alt_ref) { + const TWO_PASS *const twopass, + const FRAME_INFO *frame_info, int multi_layer_arf, + int allow_alt_ref) { + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; int coding_frame_num = 0; RATE_CONTROL rc; int gop_coding_frame_count; @@ -3928,14 +3792,14 @@ int use_alt_ref; int first_is_key_frame = 0; if (rc.frames_to_key == 0) { - rc.frames_to_key = vp9_get_frames_to_next_key( - oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval); + rc.frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, show_idx, + rc.min_gf_interval); rc.frames_since_key = 0; first_is_key_frame = 1; } gop_coding_frame_count = vp9_get_gop_coding_frame_count( - oxcf, frame_info, first_pass_info, &rc, show_idx, multi_layer_arf, + oxcf, twopass, frame_info, &rc, show_idx, multi_layer_arf, allow_alt_ref, first_is_key_frame, last_gop_use_alt_ref, &use_alt_ref); rc.source_alt_ref_active = use_alt_ref; @@ -3950,9 +3814,8 @@ } void vp9_get_key_frame_map(const VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int *key_frame_map) { + const TWO_PASS *const twopass, int *key_frame_map) { + const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; int show_idx = 0; RATE_CONTROL rc; vp9_rc_init(oxcf, 1, &rc); @@ -3965,8 +3828,8 @@ while (show_idx < first_pass_info->num_frames) { int key_frame_group_size; key_frame_map[show_idx] = 1; - key_frame_group_size = vp9_get_frames_to_next_key( - oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval); + key_frame_group_size = + vp9_get_frames_to_next_key(oxcf, twopass, show_idx, rc.min_gf_interval); assert(key_frame_group_size > 0); show_idx += key_frame_group_size; } diff -Nru libvpx-1.10.0/vp9/encoder/vp9_firstpass.h libvpx-1.11.0/vp9/encoder/vp9_firstpass.h --- libvpx-1.10.0/vp9/encoder/vp9_firstpass.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_firstpass.h 2021-10-06 17:41:19.000000000 +0000 @@ -21,27 +21,6 @@ extern "C" { #endif -#if CONFIG_FP_MB_STATS - -#define FPMB_DCINTRA_MASK 0x01 - -#define FPMB_MOTION_ZERO_MASK 0x02 -#define FPMB_MOTION_LEFT_MASK 0x04 -#define FPMB_MOTION_RIGHT_MASK 0x08 -#define FPMB_MOTION_UP_MASK 0x10 -#define FPMB_MOTION_DOWN_MASK 0x20 - -#define FPMB_ERROR_SMALL_MASK 0x40 -#define FPMB_ERROR_LARGE_MASK 0x80 -#define FPMB_ERROR_SMALL_TH 2000 -#define FPMB_ERROR_LARGE_TH 48000 - -typedef struct { - uint8_t *mb_stats_start; - uint8_t *mb_stats_end; -} FIRSTPASS_MB_STATS; -#endif - #define INVALID_ROW (-1) #define MAX_ARF_LAYERS 6 @@ -188,12 +167,6 @@ double mb_av_energy; double mb_smooth_pct; -#if CONFIG_FP_MB_STATS - uint8_t *frame_mb_stats_buf; - uint8_t *this_frame_mb_stats; - FIRSTPASS_MB_STATS firstpass_mb_stats; -#endif - FP_MB_FLOAT_STATS *fp_mb_float_stats; // An indication of the content type of the current frame @@ -221,6 +194,24 @@ int last_qindex_of_arf_layer[MAX_ARF_LAYERS]; GF_GROUP gf_group; + + // Vizeir project experimental two pass rate control parameters. + // When |use_vizier_rc_params| is 1, the following parameters will + // be overwritten by pass in values. Otherwise, they are initialized + // by default values. + int use_vizier_rc_params; + double active_wq_factor; + double err_per_mb; + double sr_default_decay_limit; + double sr_diff_factor; + double kf_err_per_mb; + double kf_frame_min_boost; + double kf_frame_max_boost_first; // Max for first kf in a chunk. + double kf_frame_max_boost_subs; // Max for subsequent mid chunk kfs. + double kf_max_total_boost; + double gf_max_total_boost; + double gf_frame_max_boost; + double zm_factor; } TWO_PASS; struct VP9_COMP; @@ -239,6 +230,7 @@ void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); +void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); @@ -248,9 +240,8 @@ struct VP9EncoderConfig; int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int kf_show_idx, int min_gf_interval); + const TWO_PASS *const twopass, int kf_show_idx, + int min_gf_interval); #if CONFIG_RATE_CTRL /* Call this function to get info about the next group of pictures. * This function should be called after vp9_create_compressor() when encoding @@ -265,8 +256,8 @@ /*!\brief Call this function before coding a new group of pictures to get * information about it. * \param[in] oxcf Encoder config + * \param[in] twopass Twopass info * \param[in] frame_info Frame info - * \param[in] first_pass_info First pass stats * \param[in] rc Rate control state * \param[in] show_idx Show index of the first frame in the group * \param[in] multi_layer_arf Is multi-layer alternate reference used @@ -279,27 +270,25 @@ * \return Returns coding frame count */ int vp9_get_gop_coding_frame_count(const struct VP9EncoderConfig *oxcf, + const TWO_PASS *const twopass, const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, int show_idx, int multi_layer_arf, int allow_alt_ref, int first_is_key_frame, int last_gop_use_alt_ref, int *use_alt_ref); int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int multi_layer_arf, int allow_alt_ref); + const TWO_PASS *const twopass, + const FRAME_INFO *frame_info, int multi_layer_arf, + int allow_alt_ref); /*!\brief Compute a key frame binary map indicates whether key frames appear * in the corresponding positions. The passed in key_frame_map must point to an - * integer array with length equal to first_pass_info->num_frames, which is the - * number of show frames in the video. + * integer array with length equal to twopass->first_pass_info.num_frames, + * which is the number of show frames in the video. */ void vp9_get_key_frame_map(const struct VP9EncoderConfig *oxcf, - const FRAME_INFO *frame_info, - const FIRST_PASS_INFO *first_pass_info, - int *key_frame_map); + const TWO_PASS *const twopass, int *key_frame_map); #endif // CONFIG_RATE_CTRL FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass); diff -Nru libvpx-1.10.0/vp9/encoder/vp9_ratectrl.c libvpx-1.11.0/vp9/encoder/vp9_ratectrl.c --- libvpx-1.10.0/vp9/encoder/vp9_ratectrl.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_ratectrl.c 2021-10-06 17:41:19.000000000 +0000 @@ -39,9 +39,6 @@ #define MAX_MB_RATE 250 #define MAXRATE_1080P 4000000 -#define DEFAULT_KF_BOOST 2000 -#define DEFAULT_GF_BOOST 2000 - #define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1 #define MIN_BPB_FACTOR 0.005 @@ -410,6 +407,7 @@ rc->source_alt_ref_active = 0; rc->frames_till_gf_update_due = 0; + rc->constrain_gf_key_freq_onepass_vbr = 1; rc->ni_av_qi = oxcf->worst_allowed_q; rc->ni_tot_qi = 0; rc->ni_frames = 0; @@ -1720,10 +1718,12 @@ } #if CONFIG_RATE_CTRL - if (cpi->encode_command.use_external_target_frame_bits) { - rc->this_frame_target = cpi->encode_command.target_frame_bits; + if (cpi->oxcf.use_simple_encode_api) { + if (cpi->encode_command.use_external_target_frame_bits) { + rc->this_frame_target = cpi->encode_command.target_frame_bits; + } } -#endif +#endif // CONFIG_RATE_CTRL // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) / @@ -2009,7 +2009,7 @@ } } -static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { +int vp9_calc_pframe_target_size_one_pass_vbr(const VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; const int af_ratio = rc->af_ratio_onepass_vbr; int64_t target = @@ -2024,7 +2024,7 @@ return vp9_rc_clamp_pframe_target_size(cpi, (int)target); } -static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { +int vp9_calc_iframe_target_size_one_pass_vbr(const VP9_COMP *cpi) { static const int kf_ratio = 25; const RATE_CONTROL *rc = &cpi->rc; const int target = rc->avg_frame_bandwidth * kf_ratio; @@ -2050,22 +2050,9 @@ } } -void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; +void vp9_set_gf_update_one_pass_vbr(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; - int target; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0)) { - cm->frame_type = KEY_FRAME; - rc->this_key_frame_forced = - cm->current_video_frame != 0 && rc->frames_to_key == 0; - rc->frames_to_key = cpi->oxcf.key_freq; - rc->kf_boost = DEFAULT_KF_BOOST; - rc->source_alt_ref_active = 0; - } else { - cm->frame_type = INTER_FRAME; - } + VP9_COMMON *const cm = &cpi->common; if (rc->frames_till_gf_update_due == 0) { double rate_err = 1.0; rc->gfu_boost = DEFAULT_GF_BOOST; @@ -2084,18 +2071,23 @@ rate_err > 3.5) { rc->baseline_gf_interval = VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1); - } else if (rc->avg_frame_low_motion < 20) { + } else if (rc->avg_frame_low_motion > 0 && + rc->avg_frame_low_motion < 20) { // Decrease gf interval for high motion case. rc->baseline_gf_interval = VPXMAX(6, rc->baseline_gf_interval >> 1); } - // Adjust boost and af_ratio based on avg_frame_low_motion, which varies - // between 0 and 100 (stationary, 100% zero/small motion). - rc->gfu_boost = - VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / - (rc->avg_frame_low_motion + 100)); + // Adjust boost and af_ratio based on avg_frame_low_motion, which + // varies between 0 and 100 (stationary, 100% zero/small motion). + if (rc->avg_frame_low_motion > 0) + rc->gfu_boost = + VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / + (rc->avg_frame_low_motion + 100)); + else if (rc->avg_frame_low_motion == 0 && rate_err > 1.0) + rc->gfu_boost = DEFAULT_GF_BOOST >> 1; rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400)); } - adjust_gfint_frame_constraint(cpi, rc->frames_to_key); + if (rc->constrain_gf_key_freq_onepass_vbr) + adjust_gfint_frame_constraint(cpi, rc->frames_to_key); rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->refresh_golden_frame = 1; rc->source_alt_ref_pending = 0; @@ -2105,10 +2097,29 @@ rc->alt_ref_gf_group = 1; } } +} + +void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; + int target; + if (!cpi->refresh_alt_ref_frame && + (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || + rc->frames_to_key == 0)) { + cm->frame_type = KEY_FRAME; + rc->this_key_frame_forced = + cm->current_video_frame != 0 && rc->frames_to_key == 0; + rc->frames_to_key = cpi->oxcf.key_freq; + rc->kf_boost = DEFAULT_KF_BOOST; + rc->source_alt_ref_active = 0; + } else { + cm->frame_type = INTER_FRAME; + } + vp9_set_gf_update_one_pass_vbr(cpi); if (cm->frame_type == KEY_FRAME) - target = calc_iframe_target_size_one_pass_vbr(cpi); + target = vp9_calc_iframe_target_size_one_pass_vbr(cpi); else - target = calc_pframe_target_size_one_pass_vbr(cpi); + target = vp9_calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) vp9_cyclic_refresh_update_parameters(cpi); @@ -2526,26 +2537,25 @@ rc->min_gf_interval = FIXED_GF_INTERVAL; rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL; } else { + double framerate = cpi->framerate; // Set Maximum gf/arf interval rc->max_gf_interval = oxcf->max_gf_interval; rc->min_gf_interval = oxcf->min_gf_interval; #if CONFIG_RATE_CTRL + if (oxcf->use_simple_encode_api) { + // In this experiment, we avoid framerate being changed dynamically during + // encoding. + framerate = oxcf->init_framerate; + } +#endif // CONFIG_RATE_CTRL if (rc->min_gf_interval == 0) { rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( - oxcf->width, oxcf->height, oxcf->init_framerate); + oxcf->width, oxcf->height, framerate); } if (rc->max_gf_interval == 0) { - rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( - oxcf->init_framerate, rc->min_gf_interval); + rc->max_gf_interval = + vp9_rc_get_default_max_gf_interval(framerate, rc->min_gf_interval); } -#else - if (rc->min_gf_interval == 0) - rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( - oxcf->width, oxcf->height, cpi->framerate); - if (rc->max_gf_interval == 0) - rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( - cpi->framerate, rc->min_gf_interval); -#endif // Extended max interval for genuinely static scenes like slide shows. rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH; @@ -2953,7 +2963,7 @@ } } } - target = calc_pframe_target_size_one_pass_vbr(cpi); + target = vp9_calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); } rc->prev_avg_source_sad_lag = avg_source_sad_lag; @@ -3163,7 +3173,7 @@ VPXMIN(20, VPXMAX(10, rc->baseline_gf_interval)); adjust_gfint_frame_constraint(cpi, rc->frames_to_key); rc->frames_till_gf_update_due = rc->baseline_gf_interval; - target = calc_pframe_target_size_one_pass_vbr(cpi); + target = vp9_calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); rc->count_last_scene_change = 0; } else { diff -Nru libvpx-1.10.0/vp9/encoder/vp9_ratectrl.h libvpx-1.11.0/vp9/encoder/vp9_ratectrl.h --- libvpx-1.10.0/vp9/encoder/vp9_ratectrl.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_ratectrl.h 2021-10-06 17:41:19.000000000 +0000 @@ -27,6 +27,9 @@ // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 +#define DEFAULT_KF_BOOST 2000 +#define DEFAULT_GF_BOOST 2000 + #define MIN_GF_INTERVAL 4 #define MAX_GF_INTERVAL 16 #define FIXED_GF_INTERVAL 8 // Used in some testing modes only @@ -205,18 +208,9 @@ int preserve_next_arf_as_gld; int show_arf_as_gld; - // Vizeir project experimental rate control parameters. - double active_wq_factor; - double base_err_per_mb; - double sr_default_decay_limit; - double sr_diff_part; - double kf_frame_max_boost_first; // Max for first kf in a chunk. - double kf_frame_max_boost_subs; // Max for subsequent mid chunk kfs. - double kf_max_total_boost; - double kf_err_per_mb; - double gf_frame_max_boost; - double gf_max_total_boost; - double zm_power_factor; + // Flag to constrain golden frame interval on key frame frequency for 1 pass + // VBR. + int constrain_gf_key_freq_onepass_vbr; } RATE_CONTROL; struct VP9_COMP; @@ -268,6 +262,9 @@ void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi); int vp9_calc_pframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi); int vp9_calc_iframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi); +int vp9_calc_pframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi); +int vp9_calc_iframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi); +void vp9_set_gf_update_one_pass_vbr(struct VP9_COMP *const cpi); void vp9_update_buffer_level_preencode(struct VP9_COMP *cpi); void vp9_rc_get_svc_params(struct VP9_COMP *cpi); diff -Nru libvpx-1.10.0/vp9/encoder/vp9_rd.c libvpx-1.11.0/vp9/encoder/vp9_rd.c --- libvpx-1.10.0/vp9/encoder/vp9_rd.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_rd.c 2021-10-06 17:41:19.000000000 +0000 @@ -201,63 +201,41 @@ // Later this function will use passed in command line values. void vp9_init_rd_parameters(VP9_COMP *cpi) { RD_CONTROL *const rdc = &cpi->rd_ctrl; - unsigned int screen_area = (cpi->common.width * cpi->common.height); + + // When |use_vizier_rc_params| is 1, we expect the rd parameters have been + // initialized by the pass in values. + // Be careful that parameters below are only initialized to 1, if we do not + // pass values to them. It is desired to take care of each parameter when + // using |use_vizier_rc_params|. + if (cpi->twopass.use_vizier_rc_params) return; // Make sure this function is floating point safe. vpx_clear_system_state(); - if (1) { - // Non/pre-Vizer defaults - rdc->rd_mult_q_sq_inter_low_qp = 4.0; - rdc->rd_mult_q_sq_inter_mid_qp = 4.5; - rdc->rd_mult_q_sq_inter_high_qp = 3.0; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.0; - rdc->rd_mult_q_sq_key_low_qp = 3.5; - rdc->rd_mult_q_sq_key_mid_qp = 4.5; - rdc->rd_mult_q_sq_key_high_qp = 7.5; - } else if (screen_area <= 176 * 144) { - rdc->rd_mult_q_sq_inter_high_qp = 4.295745965132044; - rdc->rd_mult_q_sq_inter_low_qp = 4.0718581295922025; - rdc->rd_mult_q_sq_inter_mid_qp = 4.031435609256739; - rdc->rd_mult_q_sq_key_low_qp = 5.7037775720838155; - rdc->rd_mult_q_sq_key_mid_qp = 4.72424015517201; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.290774097327333; - } else if (screen_area <= 320 * 240) { - rdc->rd_mult_q_sq_inter_high_qp = 4.388244213131458; - rdc->rd_mult_q_sq_inter_low_qp = 4.506676356706102; - rdc->rd_mult_q_sq_inter_mid_qp = 4.489349899621181; - rdc->rd_mult_q_sq_key_low_qp = 4.497000582319771; - rdc->rd_mult_q_sq_key_mid_qp = 4.2825894884789735; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.217074424696166; - } else if (screen_area <= 640 * 360) { - rdc->rd_mult_q_sq_inter_high_qp = 4.3702861603380025; - rdc->rd_mult_q_sq_inter_low_qp = 4.730644123689013; - rdc->rd_mult_q_sq_inter_mid_qp = 4.314589509578551; - rdc->rd_mult_q_sq_key_low_qp = 6.068652999601526; - rdc->rd_mult_q_sq_key_mid_qp = 4.817707474077241; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.576902541873747; - } else if (screen_area <= 854 * 480) { - rdc->rd_mult_q_sq_inter_high_qp = 3.969083125219539; - rdc->rd_mult_q_sq_inter_low_qp = 4.811470143416073; - rdc->rd_mult_q_sq_inter_mid_qp = 4.621618127750201; - rdc->rd_mult_q_sq_key_low_qp = 5.073157238799473; - rdc->rd_mult_q_sq_key_mid_qp = 5.7587672849242635; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.9854544277222566; - } else if (screen_area <= 1280 * 720) { - rdc->rd_mult_q_sq_inter_high_qp = 4.410712348825541; - rdc->rd_mult_q_sq_inter_low_qp = 5.119381136011107; - rdc->rd_mult_q_sq_inter_mid_qp = 4.518613675766538; - rdc->rd_mult_q_sq_key_low_qp = 5.848703119971484; - rdc->rd_mult_q_sq_key_mid_qp = 5.368947246228739; - rdc->rd_mult_q_sq_key_ultralow_qp = 3.9468491666607326; - } else if (screen_area <= 1920 * 1080) { - rdc->rd_mult_q_sq_inter_high_qp = 3.2141187537667797; - rdc->rd_mult_q_sq_inter_low_qp = 6.00569815296199; - rdc->rd_mult_q_sq_inter_mid_qp = 3.932565684947023; - rdc->rd_mult_q_sq_key_low_qp = 10.582906599488298; - rdc->rd_mult_q_sq_key_mid_qp = 6.274162346360692; - rdc->rd_mult_q_sq_key_ultralow_qp = 4.399795006320089; - } + rdc->rd_mult_inter_qp_fac = 1.0; + rdc->rd_mult_arf_qp_fac = 1.0; + rdc->rd_mult_key_qp_fac = 1.0; +} + +// Returns the default rd multiplier for inter frames for a given qindex. +// The function here is a first pass estimate based on data from +// a previous Vizer run +static double def_inter_rd_multiplier(int qindex) { + return 4.15 + (0.001 * (double)qindex); +} + +// Returns the default rd multiplier for ARF/Golden Frames for a given qindex. +// The function here is a first pass estimate based on data from +// a previous Vizer run +static double def_arf_rd_multiplier(int qindex) { + return 4.25 + (0.001 * (double)qindex); +} + +// Returns the default rd multiplier for key frames for a given qindex. +// The function here is a first pass estimate based on data from +// a previous Vizer run +static double def_kf_rd_multiplier(int qindex) { + return 4.35 + (0.001 * (double)qindex); } int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { @@ -269,25 +247,16 @@ // Make sure this function is floating point safe. vpx_clear_system_state(); - if (cpi->common.frame_type != KEY_FRAME) { - if (qindex < 128) { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_low_qp); - } else if (qindex < 190) { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_mid_qp); - } else { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_high_qp); - } + if (cpi->common.frame_type == KEY_FRAME) { + double def_rd_q_mult = def_kf_rd_multiplier(qindex); + rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac); + } else if (!cpi->rc.is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + double def_rd_q_mult = def_arf_rd_multiplier(qindex); + rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac); } else { - if (qindex < 64) { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_ultralow_qp); - } else if (qindex <= 128) { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_low_qp); - } else if (qindex < 190) { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_mid_qp); - - } else { - rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_high_qp); - } + double def_rd_q_mult = def_inter_rd_multiplier(qindex); + rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac); } #if CONFIG_VP9_HIGHBITDEPTH diff -Nru libvpx-1.10.0/vp9/encoder/vp9_rd.h libvpx-1.11.0/vp9/encoder/vp9_rd.h --- libvpx-1.10.0/vp9/encoder/vp9_rd.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_rd.h 2021-10-06 17:41:19.000000000 +0000 @@ -102,15 +102,10 @@ } THR_MODES_SUB8X8; typedef struct { - // RD control parameters - // Added for Vizier project. - double rd_mult_q_sq_inter_low_qp; - double rd_mult_q_sq_inter_mid_qp; - double rd_mult_q_sq_inter_high_qp; - double rd_mult_q_sq_key_ultralow_qp; - double rd_mult_q_sq_key_low_qp; - double rd_mult_q_sq_key_mid_qp; - double rd_mult_q_sq_key_high_qp; + // RD multiplier control factors added for Vizier project. + double rd_mult_inter_qp_fac; + double rd_mult_arf_qp_fac; + double rd_mult_key_qp_fac; } RD_CONTROL; typedef struct RD_OPT { diff -Nru libvpx-1.10.0/vp9/encoder/vp9_rdopt.c libvpx-1.11.0/vp9/encoder/vp9_rdopt.c --- libvpx-1.10.0/vp9/encoder/vp9_rdopt.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_rdopt.c 2021-10-06 17:41:19.000000000 +0000 @@ -745,8 +745,8 @@ MODE_INFO *const mi = xd->mi[0]; int64_t rd1, rd2, rd; int rate; - int64_t dist; - int64_t sse; + int64_t dist = INT64_MAX; + int64_t sse = INT64_MAX; const int coeff_ctx = combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]); struct buf_2d *recon = args->this_recon; @@ -799,6 +799,13 @@ if (max_txsize_lookup[plane_bsize] == tx_size) skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))]; + // This reduces the risk of bad perceptual quality due to bad prediction. + // We always force the encoder to perform transform and quantization. + if (!args->cpi->sf.allow_skip_txfm_ac_dc && + skip_txfm_flag == SKIP_TXFM_AC_DC) { + skip_txfm_flag = SKIP_TXFM_NONE; + } + if (skip_txfm_flag == SKIP_TXFM_NONE || (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) { // full forward transform and quantization @@ -827,17 +834,7 @@ dist = VPXMAX(0, sse - dc_correct); } } else { - // SKIP_TXFM_AC_DC - // skip forward transform. Because this is handled here, the quantization - // does not need to do it. - x->plane[plane].eobs[block] = 0; - sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; - dist = sse; - if (recon) { - uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; - copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, - blk_row, blk_col, plane_bsize, tx_bsize); - } + assert(0 && "allow_skip_txfm_ac_dc does not allow SKIP_TXFM_AC_DC."); } } diff -Nru libvpx-1.10.0/vp9/encoder/vp9_speed_features.c libvpx-1.11.0/vp9/encoder/vp9_speed_features.c --- libvpx-1.10.0/vp9/encoder/vp9_speed_features.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_speed_features.c 2021-10-06 17:41:19.000000000 +0000 @@ -345,7 +345,6 @@ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; - sf->allow_partition_search_skip = 1; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { @@ -931,7 +930,6 @@ sf->max_delta_qindex = 0; sf->disable_filter_search_var_thresh = 0; sf->adaptive_interp_filter_search = 0; - sf->allow_partition_search_skip = 0; sf->allow_txfm_domain_distortion = 0; sf->tx_domain_thresh = 99.0; sf->allow_quant_coeff_opt = sf->optimize_coefficients; @@ -940,6 +938,7 @@ sf->enable_tpl_model = oxcf->enable_tpl_model; sf->prune_ref_frame_for_rect_partitions = 0; sf->temporal_filter_search_method = MESH; + sf->allow_skip_txfm_ac_dc = 0; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; diff -Nru libvpx-1.10.0/vp9/encoder/vp9_speed_features.h libvpx-1.11.0/vp9/encoder/vp9_speed_features.h --- libvpx-1.10.0/vp9/encoder/vp9_speed_features.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_speed_features.h 2021-10-06 17:41:19.000000000 +0000 @@ -525,9 +525,6 @@ int prune_rect_thresh[4]; } rd_ml_partition; - // Allow skipping partition search for still image frame - int allow_partition_search_skip; - // Fast approximation of vp9_model_rd_from_var_lapndz int simple_model_rd_from_var; @@ -612,6 +609,12 @@ // For real-time mode: force DC only under intra search when content // does not have high souce SAD. int rt_intra_dc_only_low_content; + + // The encoder has a feature that skips forward transform and quantization + // based on a model rd estimation to reduce encoding time. + // However, this feature is dangerous since it could lead to bad perceptual + // quality. This flag is added to guard the feature. + int allow_skip_txfm_ac_dc; } SPEED_FEATURES; struct VP9_COMP; diff -Nru libvpx-1.10.0/vp9/encoder/vp9_svc_layercontext.c libvpx-1.11.0/vp9/encoder/vp9_svc_layercontext.c --- libvpx-1.10.0/vp9/encoder/vp9_svc_layercontext.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_svc_layercontext.c 2021-10-06 17:41:19.000000000 +0000 @@ -322,8 +322,8 @@ const int prev_layer_target_bandwidth = oxcf->layer_target_bitrate[st_idx - 1]; lc->avg_frame_size = - (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / - (lc->framerate - prev_layer_framerate)); + (int)round((lc->target_bandwidth - prev_layer_target_bandwidth) / + (lc->framerate - prev_layer_framerate)); } } diff -Nru libvpx-1.10.0/vp9/encoder/vp9_svc_layercontext.h libvpx-1.11.0/vp9/encoder/vp9_svc_layercontext.h --- libvpx-1.10.0/vp9/encoder/vp9_svc_layercontext.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/encoder/vp9_svc_layercontext.h 2021-10-06 17:41:19.000000000 +0000 @@ -173,6 +173,8 @@ uint8_t fb_idx_temporal_layer_id[REF_FRAMES]; int spatial_layer_sync[VPX_SS_MAX_LAYERS]; + // Quantizer for each spatial layer. + int base_qindex[VPX_SS_MAX_LAYERS]; uint8_t set_intra_only_frame; uint8_t previous_frame_is_intra_only; uint8_t superframe_has_layer_sync; diff -Nru libvpx-1.10.0/vp9/ratectrl_rtc.cc libvpx-1.11.0/vp9/ratectrl_rtc.cc --- libvpx-1.10.0/vp9/ratectrl_rtc.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/ratectrl_rtc.cc 2021-10-06 17:41:19.000000000 +0000 @@ -11,6 +11,7 @@ #include +#include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_picklpf.h" #include "vpx/vp8cx.h" @@ -24,10 +25,19 @@ VP9RateControlRTC()); if (!rc_api) return nullptr; rc_api->cpi_ = static_cast(vpx_memalign(32, sizeof(*cpi_))); - if (rc_api->cpi_ == nullptr) { - return nullptr; - } + if (!rc_api->cpi_) return nullptr; + vp9_zero(*rc_api->cpi_); + rc_api->InitRateControl(cfg); + if (cfg.aq_mode) { + VP9_COMP *const cpi = rc_api->cpi_; + cpi->segmentation_map = static_cast( + vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, + sizeof(*cpi->segmentation_map))); + cpi->cyclic_refresh = + vp9_cyclic_refresh_alloc(cpi->common.mi_rows, cpi->common.mi_cols); + cpi->cyclic_refresh->content_mode = 0; + } return rc_api; } @@ -38,13 +48,18 @@ cm->profile = PROFILE_0; cm->bit_depth = VPX_BITS_8; cm->show_frame = 1; - oxcf->rc_mode = VPX_CBR; + oxcf->profile = cm->profile; + oxcf->bit_depth = cm->bit_depth; + oxcf->rc_mode = rc_cfg.rc_mode; oxcf->pass = 0; - oxcf->aq_mode = NO_AQ; + oxcf->aq_mode = rc_cfg.aq_mode ? CYCLIC_REFRESH_AQ : NO_AQ; oxcf->content = VP9E_CONTENT_DEFAULT; oxcf->drop_frames_water_mark = 0; + cm->current_video_frame = 0; + rc->kf_boost = DEFAULT_KF_BOOST; UpdateRateControl(rc_cfg); + vp9_set_mb_mi(cm, cm->width, cm->height); cpi_->use_svc = (cpi_->svc.number_spatial_layers > 1 || cpi_->svc.number_temporal_layers > 1) @@ -55,8 +70,8 @@ rc->rc_2_frame = 0; vp9_rc_init_minq_luts(); vp9_rc_init(oxcf, 0, rc); + rc->constrain_gf_key_freq_onepass_vbr = 0; cpi_->sf.use_nonrd_pick_mode = 1; - cm->current_video_frame = 0; } void VP9RateControlRTC::UpdateRateControl( @@ -73,6 +88,7 @@ oxcf->best_allowed_q = vp9_quantizer_to_qindex(rc_cfg.min_quantizer); rc->worst_quality = oxcf->worst_allowed_q; rc->best_quality = oxcf->best_allowed_q; + oxcf->init_framerate = rc_cfg.framerate; oxcf->target_bandwidth = 1000 * rc_cfg.target_bandwidth; oxcf->starting_buffer_level_ms = rc_cfg.buf_initial_sz; oxcf->optimal_buffer_level_ms = rc_cfg.buf_optimal_sz; @@ -85,6 +101,7 @@ (rc_cfg.ts_number_layers > 1) ? rc_cfg.ts_number_layers : 0); cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct; + cpi_->oxcf.rc_max_inter_bitrate_pct = rc_cfg.max_inter_bitrate_pct; cpi_->framerate = rc_cfg.framerate; cpi_->svc.number_spatial_layers = rc_cfg.ss_number_layers; cpi_->svc.number_temporal_layers = rc_cfg.ts_number_layers; @@ -138,11 +155,27 @@ cpi_->sf.use_nonrd_pick_mode = 1; if (cpi_->svc.number_spatial_layers == 1 && cpi_->svc.number_temporal_layers == 1) { - int target; - if (frame_is_intra_only(cm)) - target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_); - else - target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_); + int target = 0; + if (cpi_->oxcf.rc_mode == VPX_CBR) { + if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_update_parameters(cpi_); + if (frame_is_intra_only(cm)) + target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_); + else + target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_); + } else if (cpi_->oxcf.rc_mode == VPX_VBR) { + if (cm->frame_type == KEY_FRAME) { + cpi_->rc.this_key_frame_forced = cm->current_video_frame != 0; + cpi_->rc.frames_to_key = cpi_->oxcf.key_freq; + } + vp9_set_gf_update_one_pass_vbr(cpi_); + if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_update_parameters(cpi_); + if (frame_is_intra_only(cm)) + target = vp9_calc_iframe_target_size_one_pass_vbr(cpi_); + else + target = vp9_calc_pframe_target_size_one_pass_vbr(cpi_); + } vp9_rc_set_frame_target(cpi_, target); vp9_update_buffer_level_preencode(cpi_); } else { @@ -153,6 +186,8 @@ int bottom_index, top_index; cpi_->common.base_qindex = vp9_rc_pick_q_and_bounds(cpi_, &bottom_index, &top_index); + + if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_setup(cpi_); } int VP9RateControlRTC::GetQP() const { return cpi_->common.base_qindex; } @@ -163,6 +198,14 @@ return lf->filter_level; } +signed char *VP9RateControlRTC::GetCyclicRefreshMap() const { + return cpi_->cyclic_refresh->map; +} + +int *VP9RateControlRTC::GetDeltaQ() const { + return cpi_->cyclic_refresh->qindex_delta; +} + void VP9RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) { vp9_rc_postencode_update(cpi_, encoded_frame_size); if (cpi_->svc.number_spatial_layers > 1 || diff -Nru libvpx-1.10.0/vp9/ratectrl_rtc.h libvpx-1.11.0/vp9/ratectrl_rtc.h --- libvpx-1.10.0/vp9/ratectrl_rtc.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/ratectrl_rtc.h 2021-10-06 17:41:19.000000000 +0000 @@ -18,6 +18,7 @@ #include "vp9/common/vp9_enums.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/vp9_iface_common.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/vp9_cx_iface.h" @@ -26,6 +27,37 @@ namespace libvpx { struct VP9RateControlRtcConfig { + public: + VP9RateControlRtcConfig() { + width = 1280; + height = 720; + max_quantizer = 63; + min_quantizer = 2; + target_bandwidth = 1000; + buf_initial_sz = 600; + buf_optimal_sz = 600; + buf_sz = 1000; + undershoot_pct = overshoot_pct = 50; + max_intra_bitrate_pct = 50; + max_inter_bitrate_pct = 0; + framerate = 30.0; + ss_number_layers = ts_number_layers = 1; + rc_mode = VPX_CBR; + aq_mode = 0; + vp9_zero(max_quantizers); + vp9_zero(min_quantizers); + vp9_zero(scaling_factor_den); + vp9_zero(scaling_factor_num); + vp9_zero(layer_target_bitrate); + vp9_zero(ts_rate_decimator); + scaling_factor_num[0] = 1; + scaling_factor_den[0] = 1; + layer_target_bitrate[0] = static_cast(target_bandwidth); + max_quantizers[0] = max_quantizer; + min_quantizers[0] = min_quantizer; + ts_rate_decimator[0] = 1; + } + int width; int height; // 0-63 @@ -38,6 +70,7 @@ int undershoot_pct; int overshoot_pct; int max_intra_bitrate_pct; + int max_inter_bitrate_pct; double framerate; // Number of spatial layers int ss_number_layers; @@ -49,6 +82,9 @@ int scaling_factor_den[VPX_SS_MAX_LAYERS]; int layer_target_bitrate[VPX_MAX_LAYERS]; int ts_rate_decimator[VPX_TS_MAX_LAYERS]; + // vbr, cbr + enum vpx_rc_mode rc_mode; + int aq_mode; }; struct VP9FrameParamsQpRTC { @@ -84,15 +120,23 @@ const VP9RateControlRtcConfig &cfg); ~VP9RateControlRTC() { if (cpi_) { - for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) { - for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) { - int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers); - LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer]; - vpx_free(lc->map); - vpx_free(lc->last_coded_q_map); - vpx_free(lc->consec_zero_mv); + if (cpi_->svc.number_spatial_layers > 1 || + cpi_->svc.number_temporal_layers > 1) { + for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) { + for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) { + int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers); + LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer]; + vpx_free(lc->map); + vpx_free(lc->last_coded_q_map); + vpx_free(lc->consec_zero_mv); + } } } + if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vpx_free(cpi_->segmentation_map); + cpi_->segmentation_map = NULL; + vp9_cyclic_refresh_free(cpi_->cyclic_refresh); + } vpx_free(cpi_); } } @@ -101,6 +145,8 @@ // GetQP() needs to be called after ComputeQP() to get the latest QP int GetQP() const; int GetLoopfilterLevel() const; + signed char *GetCyclicRefreshMap() const; + int *GetDeltaQ() const; void ComputeQP(const VP9FrameParamsQpRTC &frame_params); // Feedback to rate control with the size of current encoded frame void PostEncodeUpdate(uint64_t encoded_frame_size); diff -Nru libvpx-1.10.0/vp9/simple_encode.cc libvpx-1.11.0/vp9/simple_encode.cc --- libvpx-1.10.0/vp9/simple_encode.cc 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/simple_encode.cc 2021-10-06 17:41:19.000000000 +0000 @@ -793,6 +793,7 @@ if (enc_pass == VPX_RC_FIRST_PASS) { oxcf.lag_in_frames = 0; } + oxcf.use_simple_encode_api = 1; return oxcf; } @@ -872,14 +873,14 @@ const VP9EncoderConfig oxcf = GetEncodeConfig( frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_, VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list); - VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); - struct lookahead_ctx *lookahead = cpi->lookahead; + impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); + struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead; int i; int use_highbitdepth = 0; const int num_rows_16x16 = get_num_unit_16x16(frame_height_); const int num_cols_16x16 = get_num_unit_16x16(frame_width_); #if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth = cpi->common.use_highbitdepth; + use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth; #endif vpx_image_t img; vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1); @@ -905,30 +906,35 @@ ENCODE_FRAME_RESULT encode_frame_info; vp9_init_encode_frame_result(&encode_frame_info); // TODO(angiebird): Call vp9_first_pass directly - vp9_get_compressed_data(cpi, &frame_flags, &size, nullptr, &time_stamp, - &time_end, flush, &encode_frame_info); + vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr, + &time_stamp, &time_end, flush, + &encode_frame_info); // vp9_get_compressed_data only generates first pass stats not // compresses data assert(size == 0); // Get vp9 first pass motion vector info. std::vector mv_info(num_rows_16x16 * num_cols_16x16); - update_motion_vector_info(cpi->fp_motion_vector_info, num_rows_16x16, - num_cols_16x16, mv_info.data(), - kMotionVectorFullPixelPrecision); + update_motion_vector_info( + impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16, + num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision); fp_motion_vector_info_.push_back(mv_info); } - impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass)); + impl_ptr_->first_pass_stats.push_back( + vp9_get_frame_stats(&impl_ptr_->cpi->twopass)); } } - vp9_end_first_pass(cpi); // TODO(angiebird): Store the total_stats apart form first_pass_stats - impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass)); - free_encoder(cpi); - rewind(in_file_); - vpx_img_free(&img); + impl_ptr_->first_pass_stats.push_back( + vp9_get_total_stats(&impl_ptr_->cpi->twopass)); + vp9_end_first_pass(impl_ptr_->cpi); // Generate key_frame_map based on impl_ptr_->first_pass_stats. key_frame_map_ = ComputeKeyFrameMap(); + + free_encoder(impl_ptr_->cpi); + impl_ptr_->cpi = nullptr; + rewind(in_file_); + vpx_img_free(&img); } std::vector> SimpleEncode::ObserveFirstPassStats() { @@ -1004,8 +1010,7 @@ static GOP_COMMAND GetGopCommand(const std::vector &gop_map, int start_show_index) { GOP_COMMAND gop_command; - if (gop_map.size() > 0) { - assert(static_cast(start_show_index) < gop_map.size()); + if (static_cast(start_show_index) < gop_map.size()) { assert((gop_map[start_show_index] & kGopMapFlagStart) != 0); int end_show_index = start_show_index + 1; // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is @@ -1049,6 +1054,11 @@ frame_coding_index_ = 0; show_frame_count_ = 0; + assert(impl_ptr_->cpi != nullptr); + FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); + unsigned int screen_area = frame_info.frame_width * frame_info.frame_height; + vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area); + UpdateKeyFrameGroup(show_frame_count_); const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_); @@ -1084,8 +1094,7 @@ const VP9_COMP *cpi = impl_ptr_->cpi; key_frame_group_index_ = 0; key_frame_group_size_ = vp9_get_frames_to_next_key( - &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, - key_frame_show_index, cpi->rc.min_gf_interval); + &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval); assert(key_frame_group_size_ > 0); // Init the reference frame info when a new key frame group appears. InitRefFrameInfo(&ref_frame_info_); @@ -1239,7 +1248,7 @@ start_show_index += gop_command.show_frame_count; coding_frame_count += gop_command_coding_frame_count(&gop_command); } - assert(start_show_index == gop_map.size()); + assert(static_cast(start_show_index) == gop_map.size()); return coding_frame_count; } @@ -1250,6 +1259,7 @@ } // These are the default settings for now. + TWO_PASS twopass; const int multi_layer_arf = 0; const int allow_alt_ref = 1; vpx_rational_t frame_rate = @@ -1258,30 +1268,30 @@ frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list); FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); - FIRST_PASS_INFO first_pass_info; - fps_init_first_pass_info(&first_pass_info, + fps_init_first_pass_info(&twopass.first_pass_info, GetVectorData(impl_ptr_->first_pass_stats), num_frames_); - return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info, - multi_layer_arf, allow_alt_ref); + unsigned int screen_area = frame_info.frame_width * frame_info.frame_height; + vp9_init_vizier_params(&twopass, screen_area); + return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf, + allow_alt_ref); } std::vector SimpleEncode::ComputeKeyFrameMap() const { // The last entry of first_pass_stats is the overall stats. - assert(impl_ptr_->first_pass_stats.size() == num_frames_ + 1); + assert(impl_ptr_->first_pass_stats.size() == + static_cast(num_frames_) + 1); vpx_rational_t frame_rate = make_vpx_rational(frame_rate_num_, frame_rate_den_); const VP9EncoderConfig oxcf = GetEncodeConfig( frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list); - FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); - FIRST_PASS_INFO first_pass_info; - fps_init_first_pass_info(&first_pass_info, + TWO_PASS twopass; + fps_init_first_pass_info(&twopass.first_pass_info, GetVectorData(impl_ptr_->first_pass_stats), num_frames_); std::vector key_frame_map(num_frames_, 0); - vp9_get_key_frame_map(&oxcf, &frame_info, &first_pass_info, - GetVectorData(key_frame_map)); + vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map)); return key_frame_map; } diff -Nru libvpx-1.10.0/vp9/vp9_cx_iface.c libvpx-1.11.0/vp9/vp9_cx_iface.c --- libvpx-1.10.0/vp9/vp9_cx_iface.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vp9/vp9_cx_iface.c 2021-10-06 17:41:19.000000000 +0000 @@ -348,6 +348,24 @@ } RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB); RANGE_CHECK(extra_cfg, color_range, VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE); + + // The range below shall be further tuned. + RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1); + RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000); + RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000); + RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000); + RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000); + RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000); + RANGE_CHECK(cfg, zm_factor.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000); + RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000); + return VPX_CODEC_OK; } @@ -565,10 +583,6 @@ vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in); -#if CONFIG_FP_MB_STATS - oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in; -#endif - oxcf->color_space = extra_cfg->color_space; oxcf->color_range = extra_cfg->color_range; oxcf->render_width = extra_cfg->render_width; @@ -634,10 +648,135 @@ } if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf); + oxcf->use_simple_encode_api = 0; // vp9_dump_encoder_config(oxcf, stderr); return VPX_CODEC_OK; } +static vpx_codec_err_t set_twopass_params_from_config( + const vpx_codec_enc_cfg_t *const cfg, struct VP9_COMP *cpi) { + if (!cfg->use_vizier_rc_params) return VPX_CODEC_OK; + if (cpi == NULL) return VPX_CODEC_ERROR; + + cpi->twopass.use_vizier_rc_params = cfg->use_vizier_rc_params; + + // The values set here are factors that will be applied to default values + // to get the final value used in the two pass code. Hence 1.0 will + // match the default behaviour when not using passed in values. + // We also apply limits here to prevent the user from applying settings + // that make no sense. + cpi->twopass.active_wq_factor = + (double)cfg->active_wq_factor.num / (double)cfg->active_wq_factor.den; + if (cpi->twopass.active_wq_factor < 0.25) + cpi->twopass.active_wq_factor = 0.25; + else if (cpi->twopass.active_wq_factor > 16.0) + cpi->twopass.active_wq_factor = 16.0; + + cpi->twopass.err_per_mb = + (double)cfg->err_per_mb_factor.num / (double)cfg->err_per_mb_factor.den; + if (cpi->twopass.err_per_mb < 0.25) + cpi->twopass.err_per_mb = 0.25; + else if (cpi->twopass.err_per_mb > 4.0) + cpi->twopass.err_per_mb = 4.0; + + cpi->twopass.sr_default_decay_limit = + (double)cfg->sr_default_decay_limit.num / + (double)cfg->sr_default_decay_limit.den; + if (cpi->twopass.sr_default_decay_limit < 0.25) + cpi->twopass.sr_default_decay_limit = 0.25; + // If the default changes this will need to change. + else if (cpi->twopass.sr_default_decay_limit > 1.33) + cpi->twopass.sr_default_decay_limit = 1.33; + + cpi->twopass.sr_diff_factor = + (double)cfg->sr_diff_factor.num / (double)cfg->sr_diff_factor.den; + if (cpi->twopass.sr_diff_factor < 0.25) + cpi->twopass.sr_diff_factor = 0.25; + else if (cpi->twopass.sr_diff_factor > 4.0) + cpi->twopass.sr_diff_factor = 4.0; + + cpi->twopass.kf_err_per_mb = (double)cfg->kf_err_per_mb_factor.num / + (double)cfg->kf_err_per_mb_factor.den; + if (cpi->twopass.kf_err_per_mb < 0.25) + cpi->twopass.kf_err_per_mb = 0.25; + else if (cpi->twopass.kf_err_per_mb > 4.0) + cpi->twopass.kf_err_per_mb = 4.0; + + cpi->twopass.kf_frame_min_boost = (double)cfg->kf_frame_min_boost_factor.num / + (double)cfg->kf_frame_min_boost_factor.den; + if (cpi->twopass.kf_frame_min_boost < 0.25) + cpi->twopass.kf_frame_min_boost = 0.25; + else if (cpi->twopass.kf_frame_min_boost > 4.0) + cpi->twopass.kf_frame_min_boost = 4.0; + + cpi->twopass.kf_frame_max_boost_first = + (double)cfg->kf_frame_max_boost_first_factor.num / + (double)cfg->kf_frame_max_boost_first_factor.den; + if (cpi->twopass.kf_frame_max_boost_first < 0.25) + cpi->twopass.kf_frame_max_boost_first = 0.25; + else if (cpi->twopass.kf_frame_max_boost_first > 4.0) + cpi->twopass.kf_frame_max_boost_first = 4.0; + + cpi->twopass.kf_frame_max_boost_subs = + (double)cfg->kf_frame_max_boost_subs_factor.num / + (double)cfg->kf_frame_max_boost_subs_factor.den; + if (cpi->twopass.kf_frame_max_boost_subs < 0.25) + cpi->twopass.kf_frame_max_boost_subs = 0.25; + else if (cpi->twopass.kf_frame_max_boost_subs > 4.0) + cpi->twopass.kf_frame_max_boost_subs = 4.0; + + cpi->twopass.kf_max_total_boost = (double)cfg->kf_max_total_boost_factor.num / + (double)cfg->kf_max_total_boost_factor.den; + if (cpi->twopass.kf_max_total_boost < 0.25) + cpi->twopass.kf_max_total_boost = 0.25; + else if (cpi->twopass.kf_max_total_boost > 4.0) + cpi->twopass.kf_max_total_boost = 4.0; + + cpi->twopass.gf_max_total_boost = (double)cfg->gf_max_total_boost_factor.num / + (double)cfg->gf_max_total_boost_factor.den; + if (cpi->twopass.gf_max_total_boost < 0.25) + cpi->twopass.gf_max_total_boost = 0.25; + else if (cpi->twopass.gf_max_total_boost > 4.0) + cpi->twopass.gf_max_total_boost = 4.0; + + cpi->twopass.gf_frame_max_boost = (double)cfg->gf_frame_max_boost_factor.num / + (double)cfg->gf_frame_max_boost_factor.den; + if (cpi->twopass.gf_frame_max_boost < 0.25) + cpi->twopass.gf_frame_max_boost = 0.25; + else if (cpi->twopass.gf_frame_max_boost > 4.0) + cpi->twopass.gf_frame_max_boost = 4.0; + + cpi->twopass.zm_factor = + (double)cfg->zm_factor.num / (double)cfg->zm_factor.den; + if (cpi->twopass.zm_factor < 0.25) + cpi->twopass.zm_factor = 0.25; + else if (cpi->twopass.zm_factor > 2.0) + cpi->twopass.zm_factor = 2.0; + + cpi->rd_ctrl.rd_mult_inter_qp_fac = (double)cfg->rd_mult_inter_qp_fac.num / + (double)cfg->rd_mult_inter_qp_fac.den; + if (cpi->rd_ctrl.rd_mult_inter_qp_fac < 0.25) + cpi->rd_ctrl.rd_mult_inter_qp_fac = 0.25; + else if (cpi->rd_ctrl.rd_mult_inter_qp_fac > 4.0) + cpi->rd_ctrl.rd_mult_inter_qp_fac = 4.0; + + cpi->rd_ctrl.rd_mult_arf_qp_fac = + (double)cfg->rd_mult_arf_qp_fac.num / (double)cfg->rd_mult_arf_qp_fac.den; + if (cpi->rd_ctrl.rd_mult_arf_qp_fac < 0.25) + cpi->rd_ctrl.rd_mult_arf_qp_fac = 0.25; + else if (cpi->rd_ctrl.rd_mult_arf_qp_fac > 4.0) + cpi->rd_ctrl.rd_mult_arf_qp_fac = 4.0; + + cpi->rd_ctrl.rd_mult_key_qp_fac = + (double)cfg->rd_mult_key_qp_fac.num / (double)cfg->rd_mult_key_qp_fac.den; + if (cpi->rd_ctrl.rd_mult_key_qp_fac < 0.25) + cpi->rd_ctrl.rd_mult_key_qp_fac = 0.25; + else if (cpi->rd_ctrl.rd_mult_key_qp_fac > 4.0) + cpi->rd_ctrl.rd_mult_key_qp_fac = 4.0; + + return VPX_CODEC_OK; +} + static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; @@ -664,6 +803,7 @@ if (res == VPX_CODEC_OK) { ctx->cfg = *cfg; set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + set_twopass_params_from_config(&ctx->cfg, ctx->cpi); // On profile change, request a key frame force_key |= ctx->cpi->common.profile != ctx->oxcf.profile; vp9_change_config(ctx->cpi, &ctx->oxcf); @@ -690,12 +830,32 @@ return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_get_quantizer_svc_layers(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const arg = va_arg(args, int *); + int i; + if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) { + arg[i] = ctx->cpi->svc.base_qindex[i]; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_get_loopfilter_level(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const arg = va_arg(args, int *); + if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + *arg = ctx->cpi->common.lf.filter_level; + return VPX_CODEC_OK; +} + static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx, const struct vp9_extracfg *extra_cfg) { const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg); if (res == VPX_CODEC_OK) { ctx->extra_cfg = *extra_cfg; set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + set_twopass_params_from_config(&ctx->cfg, ctx->cpi); vp9_change_config(ctx->cpi, &ctx->oxcf); } return res; @@ -886,6 +1046,18 @@ return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_rtc_external_ratectrl(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + const unsigned int data = va_arg(args, unsigned int); + if (data) { + cpi->compute_frame_low_motion_onepass = 0; + cpi->rc.constrain_gf_key_freq_onepass_vbr = 0; + cpi->cyclic_refresh->content_mode = 0; + } + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_enable_motion_vector_unit_test( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; @@ -940,6 +1112,7 @@ #endif priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool); if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR; + set_twopass_params_from_config(&priv->cfg, priv->cpi); } } @@ -1816,11 +1989,14 @@ { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync }, { VP9E_SET_DELTA_Q_UV, ctrl_set_delta_q_uv }, { VP9E_SET_DISABLE_LOOPFILTER, ctrl_set_disable_loopfilter }, + { VP9E_SET_RTC_EXTERNAL_RATECTRL, ctrl_set_rtc_external_ratectrl }, { VP9E_SET_EXTERNAL_RATE_CONTROL, ctrl_set_external_rate_control }, // Getters { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, { VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 }, + { VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, ctrl_get_quantizer_svc_layers }, + { VP9E_GET_LOOPFILTER_LEVEL, ctrl_get_loopfilter_level }, { VP9_GET_REFERENCE, ctrl_get_reference }, { VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id }, { VP9E_GET_ACTIVEMAP, ctrl_get_active_map }, @@ -1883,14 +2059,30 @@ VPX_SS_DEFAULT_LAYERS, // ss_number_layers { 0 }, - { 0 }, // ss_target_bitrate - 1, // ts_number_layers - { 0 }, // ts_target_bitrate - { 0 }, // ts_rate_decimator - 0, // ts_periodicity - { 0 }, // ts_layer_id - { 0 }, // layer_taget_bitrate - 0 // temporal_layering_mode + { 0 }, // ss_target_bitrate + 1, // ts_number_layers + { 0 }, // ts_target_bitrate + { 0 }, // ts_rate_decimator + 0, // ts_periodicity + { 0 }, // ts_layer_id + { 0 }, // layer_taget_bitrate + 0, // temporal_layering_mode + 0, // use_vizier_rc_params + { 1, 1 }, // active_wq_factor + { 1, 1 }, // err_per_mb_factor + { 1, 1 }, // sr_default_decay_limit + { 1, 1 }, // sr_diff_factor + { 1, 1 }, // kf_err_per_mb_factor + { 1, 1 }, // kf_frame_min_boost_factor + { 1, 1 }, // kf_frame_max_boost_first_factor + { 1, 1 }, // kf_frame_max_boost_subs_factor + { 1, 1 }, // kf_max_total_boost_factor + { 1, 1 }, // gf_max_total_boost_factor + { 1, 1 }, // gf_frame_max_boost_factor + { 1, 1 }, // zm_factor + { 1, 1 }, // rd_mult_inter_qp_fac + { 1, 1 }, // rd_mult_arf_qp_fac + { 1, 1 }, // rd_mult_key_qp_fac } }, }; @@ -2109,11 +2301,6 @@ DUMP_STRUCT_VALUE(fp, oxcf, target_level); // TODO(angiebird): dump two_pass_stats_in - -#if CONFIG_FP_MB_STATS - // TODO(angiebird): dump firstpass_mb_stats_in -#endif - DUMP_STRUCT_VALUE(fp, oxcf, tuning); DUMP_STRUCT_VALUE(fp, oxcf, content); #if CONFIG_VP9_HIGHBITDEPTH @@ -2127,6 +2314,8 @@ DUMP_STRUCT_VALUE(fp, oxcf, row_mt); DUMP_STRUCT_VALUE(fp, oxcf, motion_vector_unit_test); + DUMP_STRUCT_VALUE(fp, oxcf, delta_q_uv); + DUMP_STRUCT_VALUE(fp, oxcf, use_simple_encode_api); } FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) { diff -Nru libvpx-1.10.0/vpx/internal/vpx_codec_internal.h libvpx-1.11.0/vpx/internal/vpx_codec_internal.h --- libvpx-1.10.0/vpx/internal/vpx_codec_internal.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx/internal/vpx_codec_internal.h 2021-10-06 17:41:19.000000000 +0000 @@ -283,7 +283,7 @@ vpx_codec_enc_cfg_t cfg; } vpx_codec_enc_cfg_map_t; -/*!\brief Decoder algorithm interface interface +/*!\brief Decoder algorithm interface * * All decoders \ref MUST expose a variable of this type. */ diff -Nru libvpx-1.10.0/vpx/src/vpx_image.c libvpx-1.11.0/vpx/src/vpx_image.c --- libvpx-1.10.0/vpx/src/vpx_image.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx/src/vpx_image.c 2021-10-06 17:41:19.000000000 +0000 @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include @@ -22,7 +23,9 @@ unsigned char *img_data) { unsigned int h, w, s, xcs, ycs, bps; unsigned int stride_in_bytes; - int align; + unsigned int align; + + if (img != NULL) memset(img, 0, sizeof(vpx_image_t)); /* Treat align==0 like align==1 */ if (!buf_align) buf_align = 1; @@ -88,8 +91,6 @@ if (!img) goto fail; img->self_allocd = 1; - } else { - memset(img, 0, sizeof(vpx_image_t)); } img->img_data = img_data; @@ -152,9 +153,8 @@ int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { - unsigned char *data; - - if (x + w <= img->w && y + h <= img->h) { + if (x <= UINT_MAX - w && x + w <= img->w && y <= UINT_MAX - h && + y + h <= img->h) { img->d_w = w; img->d_h = h; @@ -165,7 +165,7 @@ } else { const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; - data = img->img_data; + unsigned char *data = img->img_data; if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) { img->planes[VPX_PLANE_ALPHA] = diff -Nru libvpx-1.10.0/vpx/vp8cx.h libvpx-1.11.0/vpx/vp8cx.h --- libvpx-1.10.0/vpx/vp8cx.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx/vp8cx.h 2021-10-06 17:41:19.000000000 +0000 @@ -712,6 +712,36 @@ * Supported in codecs: VP9 */ VP9E_SET_EXTERNAL_RATE_CONTROL, + + /*!\brief Codec control to disable internal features in rate control. + * + * This will do 3 things, only for 1 pass: + * - Turn off low motion computation + * - Turn off gf update constraint on key frame frequency + * - Turn off content mode for cyclic refresh + * + * With those, the rate control is expected to work exactly the same as the + * interface provided in ratectrl_rtc.cc/h + * + * Supported in codecs: VP9 + */ + VP9E_SET_RTC_EXTERNAL_RATECTRL, + + /*!\brief Codec control function to get loopfilter level in the encoder. + * + * Supported in codecs: VP9 + */ + VP9E_GET_LOOPFILTER_LEVEL, + + /*!\brief Codec control to get last quantizers for all spatial layers. + * + * Return value uses an array of internal quantizers scale defined by the + * codec, for all spatial layers. + * The size of the array passed in should be #VPX_SS_MAX_LAYERS. + * + * Supported in codecs: VP9 + */ + VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, }; /*!\brief vpx 1-D scaling mode @@ -969,6 +999,9 @@ #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64 +VPX_CTRL_USE_TYPE(VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, int *) +#define VPX_CTRL_VP9E_GET_LAST_QUANTIZER_SVC_LAYERS + VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *) #define VPX_CTRL_VP9E_GET_SVC_LAYER_ID @@ -1037,6 +1070,9 @@ VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *) #define VPX_CTRL_VP9E_GET_LEVEL +VPX_CTRL_USE_TYPE(VP9E_GET_LOOPFILTER_LEVEL, int *) +#define VPX_CTRL_VP9E_GET_LOOPFILTER_LEVEL + VPX_CTRL_USE_TYPE(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int) #define VPX_CTRL_VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST @@ -1068,6 +1104,9 @@ VPX_CTRL_USE_TYPE(VP9E_SET_DISABLE_LOOPFILTER, int) #define VPX_CTRL_VP9E_SET_DISABLE_LOOPFILTER +VPX_CTRL_USE_TYPE(VP9E_SET_RTC_EXTERNAL_RATECTRL, int) +#define VPX_CTRL_VP9E_SET_RTC_EXTERNAL_RATECTRL + VPX_CTRL_USE_TYPE(VP9E_SET_EXTERNAL_RATE_CONTROL, vpx_rc_funcs_t *) #define VPX_CTRL_VP9E_SET_EXTERNAL_RATE_CONTROL diff -Nru libvpx-1.10.0/vpx/vpx_encoder.h libvpx-1.11.0/vpx/vpx_encoder.h --- libvpx-1.10.0/vpx/vpx_encoder.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx/vpx_encoder.h 2021-10-06 17:41:19.000000000 +0000 @@ -58,7 +58,7 @@ * fields to structures */ #define VPX_ENCODER_ABI_VERSION \ - (14 + VPX_CODEC_ABI_VERSION + \ + (15 + VPX_CODEC_ABI_VERSION + \ VPX_EXT_RATECTRL_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield @@ -457,7 +457,7 @@ /*!\brief Target data rate * - * Target bandwidth to use for this stream, in kilobits per second. + * Target bitrate to use for this stream, in kilobits per second. */ unsigned int rc_target_bitrate; @@ -498,7 +498,7 @@ * undershoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * * - * Valid values in the range VP8:0-1000 VP9: 0-100. + * Valid values in the range VP8:0-100 VP9: 0-100. */ unsigned int rc_undershoot_pct; @@ -513,7 +513,7 @@ * overshoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * - * Valid values in the range VP8:0-1000 VP9: 0-100. + * Valid values in the range VP8:0-100 VP9: 0-100. */ unsigned int rc_overshoot_pct; @@ -693,6 +693,151 @@ * */ int temporal_layering_mode; + + /*!\brief A flag indicating whether to use external rate control parameters. + * By default is 0. If set to 1, the following parameters will be used in the + * rate control system. + */ + int use_vizier_rc_params; + + /*!\brief Active worst quality factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t active_wq_factor; + + /*!\brief Error per macroblock adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t err_per_mb_factor; + + /*!\brief Second reference default decay limit. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t sr_default_decay_limit; + + /*!\brief Second reference difference factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t sr_diff_factor; + + /*!\brief Keyframe error per macroblock adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t kf_err_per_mb_factor; + + /*!\brief Keyframe minimum boost adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t kf_frame_min_boost_factor; + + /*!\brief Keyframe maximum boost adjustment factor, for the first keyframe + * in a chunk. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t kf_frame_max_boost_first_factor; + + /*!\brief Keyframe maximum boost adjustment factor, for subsequent keyframes. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t kf_frame_max_boost_subs_factor; + + /*!\brief Keyframe maximum total boost adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t kf_max_total_boost_factor; + + /*!\brief Golden frame maximum total boost adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t gf_max_total_boost_factor; + + /*!\brief Golden frame maximum boost adjustment factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t gf_frame_max_boost_factor; + + /*!\brief Zero motion power factor. + * + * Rate control parameters, set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t zm_factor; + + /*!\brief Rate-distortion multiplier for inter frames. + * The multiplier is a crucial parameter in the calculation of rate distortion + * cost. It is often related to the qp (qindex) value. + * Rate control parameters, could be set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t rd_mult_inter_qp_fac; + + /*!\brief Rate-distortion multiplier for alt-ref frames. + * The multiplier is a crucial parameter in the calculation of rate distortion + * cost. It is often related to the qp (qindex) value. + * Rate control parameters, could be set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t rd_mult_arf_qp_fac; + + /*!\brief Rate-distortion multiplier for key frames. + * The multiplier is a crucial parameter in the calculation of rate distortion + * cost. It is often related to the qp (qindex) value. + * Rate control parameters, could be set from external experiment results. + * Only when |use_vizier_rc_params| is set to 1, the pass in value will be + * used. Otherwise, the default value is used. + * + */ + vpx_rational_t rd_mult_key_qp_fac; } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ /*!\brief vp9 svc extra configure parameters diff -Nru libvpx-1.10.0/vpx/vpx_image.h libvpx-1.11.0/vpx/vpx_image.h --- libvpx-1.10.0/vpx/vpx_image.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx/vpx_image.h 2021-10-06 17:41:19.000000000 +0000 @@ -171,7 +171,8 @@ /*!\brief Set the rectangle identifying the displayed portion of the image * * Updates the displayed rectangle (aka viewport) on the image surface to - * match the specified coordinates and size. + * match the specified coordinates and size. Specifically, sets img->d_w, + * img->d_h, and elements of the img->planes[] array. * * \param[in] img Image descriptor * \param[in] x leftmost column @@ -179,7 +180,7 @@ * \param[in] w width * \param[in] h height * - * \return 0 if the requested rectangle is valid, nonzero otherwise. + * \return 0 if the requested rectangle is valid, nonzero (-1) otherwise. */ int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y, unsigned int w, unsigned int h); diff -Nru libvpx-1.10.0/vpx_dsp/arm/avg_neon.c libvpx-1.11.0/vpx_dsp/arm/avg_neon.c --- libvpx-1.10.0/vpx_dsp/arm/avg_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/avg_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -22,15 +22,13 @@ uint32_t vpx_avg_4x4_neon(const uint8_t *a, int a_stride) { const uint8x16_t b = load_unaligned_u8q(a, a_stride); const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b)); - const uint32x2_t d = horizontal_add_uint16x8(c); - return vget_lane_u32(vrshr_n_u32(d, 4), 0); + return (horizontal_add_uint16x8(c) + (1 << 3)) >> 4; } uint32_t vpx_avg_8x8_neon(const uint8_t *a, int a_stride) { int i; uint8x8_t b, c; uint16x8_t sum; - uint32x2_t d; b = vld1_u8(a); a += a_stride; c = vld1_u8(a); @@ -43,9 +41,7 @@ sum = vaddw_u8(sum, d); } - d = horizontal_add_uint16x8(sum); - - return vget_lane_u32(vrshr_n_u32(d, 6), 0); + return (horizontal_add_uint16x8(sum) + (1 << 5)) >> 6; } // coeff: 16 bits, dynamic range [-32640, 32640]. @@ -139,8 +135,7 @@ ref += 16; } - return vget_lane_s16(vreinterpret_s16_u32(horizontal_add_uint16x8(vec_sum)), - 0); + return (int16_t)horizontal_add_uint16x8(vec_sum); } // ref, src = [0, 510] - max diff = 16-bits diff -Nru libvpx-1.10.0/vpx_dsp/arm/fdct_partial_neon.c libvpx-1.11.0/vpx_dsp/arm/fdct_partial_neon.c --- libvpx-1.10.0/vpx_dsp/arm/fdct_partial_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/fdct_partial_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -15,19 +15,10 @@ #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" -static INLINE tran_low_t get_lane(const int32x2_t a) { -#if CONFIG_VP9_HIGHBITDEPTH - return vget_lane_s32(a, 0); -#else - return vget_lane_s16(vreinterpret_s16_s32(a), 0); -#endif // CONFIG_VP9_HIGHBITDETPH -} - void vpx_fdct4x4_1_neon(const int16_t *input, tran_low_t *output, int stride) { int16x4_t a0, a1, a2, a3; int16x8_t b0, b1; int16x8_t c; - int32x2_t d; a0 = vld1_s16(input); input += stride; @@ -42,9 +33,7 @@ c = vaddq_s16(b0, b1); - d = horizontal_add_int16x8(c); - - output[0] = get_lane(vshl_n_s32(d, 1)); + output[0] = (tran_low_t)(horizontal_add_int16x8(c) << 1); output[1] = 0; } @@ -57,7 +46,7 @@ sum = vaddq_s16(sum, input_00); } - output[0] = get_lane(horizontal_add_int16x8(sum)); + output[0] = (tran_low_t)horizontal_add_int16x8(sum); output[1] = 0; } @@ -66,7 +55,7 @@ int r; int16x8_t left = vld1q_s16(input); int16x8_t right = vld1q_s16(input + 8); - int32x2_t sum; + int32_t sum; input += stride; for (r = 1; r < 16; ++r) { @@ -77,9 +66,9 @@ right = vaddq_s16(right, b); } - sum = vadd_s32(horizontal_add_int16x8(left), horizontal_add_int16x8(right)); + sum = horizontal_add_int16x8(left) + horizontal_add_int16x8(right); - output[0] = get_lane(vshr_n_s32(sum, 1)); + output[0] = (tran_low_t)(sum >> 1); output[1] = 0; } @@ -90,7 +79,7 @@ int16x8_t a1 = vld1q_s16(input + 8); int16x8_t a2 = vld1q_s16(input + 16); int16x8_t a3 = vld1q_s16(input + 24); - int32x2_t sum; + int32_t sum; input += stride; for (r = 1; r < 32; ++r) { @@ -105,9 +94,10 @@ a3 = vaddq_s16(a3, b3); } - sum = vadd_s32(horizontal_add_int16x8(a0), horizontal_add_int16x8(a1)); - sum = vadd_s32(sum, horizontal_add_int16x8(a2)); - sum = vadd_s32(sum, horizontal_add_int16x8(a3)); - output[0] = get_lane(vshr_n_s32(sum, 3)); + sum = horizontal_add_int16x8(a0); + sum += horizontal_add_int16x8(a1); + sum += horizontal_add_int16x8(a2); + sum += horizontal_add_int16x8(a3); + output[0] = (tran_low_t)(sum >> 3); output[1] = 0; } diff -Nru libvpx-1.10.0/vpx_dsp/arm/mem_neon.h libvpx-1.11.0/vpx_dsp/arm/mem_neon.h --- libvpx-1.10.0/vpx_dsp/arm/mem_neon.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/mem_neon.h 2021-10-06 17:41:19.000000000 +0000 @@ -19,6 +19,24 @@ #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" +// Support for these xN intrinsics is lacking in older versions of GCC. +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ < 8 || defined(__arm__) +static INLINE uint8x16x2_t vld1q_u8_x2(uint8_t const *ptr) { + uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } }; + return res; +} +#endif + +#if __GNUC__ < 9 || defined(__arm__) +static INLINE uint8x16x3_t vld1q_u8_x3(uint8_t const *ptr) { + uint8x16x3_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16), + vld1q_u8(ptr + 2 * 16) } }; + return res; +} +#endif +#endif + static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1, const int16_t c2, const int16_t c3) { return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) | @@ -95,7 +113,8 @@ } // Load 2 sets of 4 bytes when alignment is not guaranteed. -static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) { +static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, + ptrdiff_t stride) { uint32_t a; uint32x2_t a_u32 = vdup_n_u32(0); if (stride == 4) return vld1_u8(buf); @@ -108,7 +127,7 @@ } // Store 2 sets of 4 bytes when alignment is not guaranteed. -static INLINE void store_unaligned_u8(uint8_t *buf, int stride, +static INLINE void store_unaligned_u8(uint8_t *buf, ptrdiff_t stride, const uint8x8_t a) { const uint32x2_t a_u32 = vreinterpret_u32_u8(a); if (stride == 4) { @@ -121,7 +140,8 @@ } // Load 4 sets of 4 bytes when alignment is not guaranteed. -static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) { +static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, + ptrdiff_t stride) { uint32_t a; uint32x4_t a_u32 = vdupq_n_u32(0); if (stride == 4) return vld1q_u8(buf); @@ -141,7 +161,7 @@ } // Store 4 sets of 4 bytes when alignment is not guaranteed. -static INLINE void store_unaligned_u8q(uint8_t *buf, int stride, +static INLINE void store_unaligned_u8q(uint8_t *buf, ptrdiff_t stride, const uint8x16_t a) { const uint32x4_t a_u32 = vreinterpretq_u32_u8(a); if (stride == 4) { @@ -158,7 +178,7 @@ } // Load 2 sets of 4 bytes when alignment is guaranteed. -static INLINE uint8x8_t load_u8(const uint8_t *buf, int stride) { +static INLINE uint8x8_t load_u8(const uint8_t *buf, ptrdiff_t stride) { uint32x2_t a = vdup_n_u32(0); assert(!((intptr_t)buf % sizeof(uint32_t))); @@ -171,7 +191,7 @@ } // Store 2 sets of 4 bytes when alignment is guaranteed. -static INLINE void store_u8(uint8_t *buf, int stride, const uint8x8_t a) { +static INLINE void store_u8(uint8_t *buf, ptrdiff_t stride, const uint8x8_t a) { uint32x2_t a_u32 = vreinterpret_u32_u8(a); assert(!((intptr_t)buf % sizeof(uint32_t))); diff -Nru libvpx-1.10.0/vpx_dsp/arm/sad4d_neon.c libvpx-1.11.0/vpx_dsp/arm/sad4d_neon.c --- libvpx-1.10.0/vpx_dsp/arm/sad4d_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/sad4d_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -34,7 +34,9 @@ uint32_t *const res) { int i; uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) }; +#if !defined(__aarch64__) uint16x4_t a[2]; +#endif uint32x4_t r; assert(!((intptr_t)src_ptr % sizeof(uint32_t))); @@ -51,9 +53,14 @@ abs[1] = vabal_u8(abs[1], s, ref23); } +#if defined(__aarch64__) + abs[0] = vpaddq_u16(abs[0], abs[1]); + r = vpaddlq_u16(abs[0]); +#else a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0])); a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1])); r = vpaddlq_u16(vcombine_u16(a[0], a[1])); +#endif vst1q_u32(res, r); } @@ -74,6 +81,12 @@ // Can handle 512 pixels' sad sum (such as 16x32 or 32x16) static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { +#if defined(__aarch64__) + const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]); + const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]); + const uint16x8_t b0 = vpaddq_u16(a0, a1); + const uint32x4_t r = vpaddlq_u16(b0); +#else const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); @@ -81,12 +94,23 @@ const uint16x4_t b0 = vpadd_u16(a0, a1); const uint16x4_t b1 = vpadd_u16(a2, a3); const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1)); +#endif vst1q_u32(res, r); } +#if defined(__arm__) || !defined(__ARM_FEATURE_DOTPROD) + // Can handle 1024 pixels' sad sum (such as 32x32) static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { +#if defined(__aarch64__) + const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]); + const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]); + const uint32x4_t b0 = vpaddlq_u16(a0); + const uint32x4_t b1 = vpaddlq_u16(a1); + const uint32x4_t r = vpaddq_u32(b0, b1); + vst1q_u32(res, r); +#else const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); @@ -96,11 +120,22 @@ const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0)); const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1)); vst1q_u32(res, vcombine_u32(c0, c1)); +#endif } // Can handle 2048 pixels' sad sum (such as 32x64 or 64x32) static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { +#if defined(__aarch64__) + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x4_t b0 = vpaddq_u32(a0, a1); + const uint32x4_t b1 = vpaddq_u32(a2, a3); + const uint32x4_t r = vpaddq_u32(b0, b1); + vst1q_u32(res, r); +#else const uint32x4_t a0 = vpaddlq_u16(sum[0]); const uint32x4_t a1 = vpaddlq_u16(sum[1]); const uint32x4_t a2 = vpaddlq_u16(sum[2]); @@ -112,11 +147,30 @@ const uint32x2_t c0 = vpadd_u32(b0, b1); const uint32x2_t c1 = vpadd_u32(b2, b3); vst1q_u32(res, vcombine_u32(c0, c1)); +#endif } // Can handle 4096 pixels' sad sum (such as 64x64) static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/, uint32_t *const res) { +#if defined(__aarch64__) + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x4_t a4 = vpaddlq_u16(sum[4]); + const uint32x4_t a5 = vpaddlq_u16(sum[5]); + const uint32x4_t a6 = vpaddlq_u16(sum[6]); + const uint32x4_t a7 = vpaddlq_u16(sum[7]); + const uint32x4_t b0 = vaddq_u32(a0, a1); + const uint32x4_t b1 = vaddq_u32(a2, a3); + const uint32x4_t b2 = vaddq_u32(a4, a5); + const uint32x4_t b3 = vaddq_u32(a6, a7); + const uint32x4_t c0 = vpaddq_u32(b0, b1); + const uint32x4_t c1 = vpaddq_u32(b2, b3); + const uint32x4_t r = vpaddq_u32(c0, c1); + vst1q_u32(res, r); +#else const uint32x4_t a0 = vpaddlq_u16(sum[0]); const uint32x4_t a1 = vpaddlq_u16(sum[1]); const uint32x4_t a2 = vpaddlq_u16(sum[2]); @@ -136,8 +190,11 @@ const uint32x2_t d0 = vpadd_u32(c0, c1); const uint32x2_t d1 = vpadd_u32(c2, c3); vst1q_u32(res, vcombine_u32(d0, d1)); +#endif } +#endif + static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { @@ -180,6 +237,41 @@ //////////////////////////////////////////////////////////////////////////////// +#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \ + (__ARM_FEATURE_DOTPROD == 1) + +static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr, + uint32x4_t *const sum) { + const uint8x16_t r = vld1q_u8(ref_ptr); + const uint8x16_t diff = vabdq_u8(src_ptr, r); + *sum = vdotq_u32(*sum, diff, vdupq_n_u8(1)); +} + +static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res, const int height) { + int i; + uint32x4_t r0, r1; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0), + vdupq_n_u32(0) }; + + for (i = 0; i < height; ++i) { + const uint8x16_t s = vld1q_u8(src_ptr + i * src_stride); + sad16_neon(ref_loop[0] + i * ref_stride, s, &sum[0]); + sad16_neon(ref_loop[1] + i * ref_stride, s, &sum[1]); + sad16_neon(ref_loop[2] + i * ref_stride, s, &sum[2]); + sad16_neon(ref_loop[3] + i * ref_stride, s, &sum[3]); + } + + r0 = vpaddq_u32(sum[0], sum[1]); + r1 = vpaddq_u32(sum[2], sum[3]); + vst1q_u32(res, vpaddq_u32(r0, r1)); +} + +#else + static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr, uint16x8_t *const sum) { const uint8x16_t r = vld1q_u8(ref_ptr); @@ -190,7 +282,7 @@ static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { - int i, j; + int i; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), @@ -199,15 +291,22 @@ for (i = 0; i < height; ++i) { const uint8x16_t s = vld1q_u8(src_ptr); src_ptr += src_stride; - for (j = 0; j < 4; ++j) { - sad16_neon(ref_loop[j], s, &sum[j]); - ref_loop[j] += ref_stride; - } + /* Manual unrolling here stops the compiler from getting confused. */ + sad16_neon(ref_loop[0], s, &sum[0]); + ref_loop[0] += ref_stride; + sad16_neon(ref_loop[1], s, &sum[1]); + ref_loop[1] += ref_stride; + sad16_neon(ref_loop[2], s, &sum[2]); + ref_loop[2] += ref_stride; + sad16_neon(ref_loop[3], s, &sum[3]); + ref_loop[3] += ref_stride; } sad_512_pel_final_neon(sum, res); } +#endif + void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { @@ -228,6 +327,67 @@ //////////////////////////////////////////////////////////////////////////////// +#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \ + (__ARM_FEATURE_DOTPROD == 1) + +static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res, const int height) { + int i; + uint32x4_t r0, r1; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + + uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0), + vdupq_n_u32(0) }; + + for (i = 0; i < height; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } + + r0 = vpaddq_u32(sum[0], sum[1]); + r1 = vpaddq_u32(sum[2], sum[3]); + vst1q_u32(res, vpaddq_u32(r0, r1)); +} + +void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); +} + +void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32); +} + +void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 64); +} + +#else + static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, const int height, uint16x8_t *const sum) { @@ -284,8 +444,118 @@ sad_2048_pel_final_neon(sum, res); } +#endif + //////////////////////////////////////////////////////////////////////////////// +#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \ + (__ARM_FEATURE_DOTPROD == 1) + +void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + int i; + uint32x4_t r0, r1; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0), + vdupq_n_u32(0) }; + + for (i = 0; i < 32; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]); + + s = vld1q_u8(src_ptr + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } + + r0 = vpaddq_u32(sum[0], sum[1]); + r1 = vpaddq_u32(sum[2], sum[3]); + vst1q_u32(res, vpaddq_u32(r0, r1)); +} + +void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, + const uint8_t *const ref_array[4], int ref_stride, + uint32_t *res) { + int i; + uint32x4_t r0, r1, r2, r3; + const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], + ref_array[3] }; + uint32x4_t sum[8] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0), + vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0), + vdupq_n_u32(0), vdupq_n_u32(0) }; + + for (i = 0; i < 64; ++i) { + uint8x16_t s; + + s = vld1q_u8(src_ptr + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]); + + s = vld1q_u8(src_ptr + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]); + + s = vld1q_u8(src_ptr + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]); + + s = vld1q_u8(src_ptr + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]); + + src_ptr += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } + + r0 = vpaddq_u32(sum[0], sum[1]); + r1 = vpaddq_u32(sum[2], sum[3]); + r2 = vpaddq_u32(sum[4], sum[5]); + r3 = vpaddq_u32(sum[6], sum[7]); + r0 = vpaddq_u32(r0, r1); + r1 = vpaddq_u32(r2, r3); + vst1q_u32(res, vpaddq_u32(r0, r1)); +} + +#else + void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { @@ -378,3 +648,5 @@ sad_4096_pel_final_neon(sum, res); } + +#endif diff -Nru libvpx-1.10.0/vpx_dsp/arm/sad_neon.c libvpx-1.11.0/vpx_dsp/arm/sad_neon.c --- libvpx-1.10.0/vpx_dsp/arm/sad_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/sad_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -23,7 +23,7 @@ const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride); uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); + return horizontal_add_uint16x8(abs); } uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride, @@ -35,7 +35,7 @@ const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8); uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(avg)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg)); - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); + return horizontal_add_uint16x8(abs); } uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride, @@ -51,7 +51,7 @@ abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); } - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); + return horizontal_add_uint16x8(abs); } uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride, @@ -71,7 +71,7 @@ abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg)); } - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); + return horizontal_add_uint16x8(abs); } static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride, @@ -114,7 +114,7 @@ uint32_t vpx_sad8x##n##_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = sad8x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } \ \ uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ @@ -122,7 +122,7 @@ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } sad8xN(4); @@ -172,7 +172,7 @@ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = \ sad16x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } \ \ uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ @@ -180,7 +180,7 @@ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } sad16xN(8); @@ -240,7 +240,7 @@ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = \ sad32x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } \ \ uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ @@ -248,7 +248,7 @@ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ + return horizontal_add_uint16x8(abs); \ } sad32xN(16); @@ -338,7 +338,7 @@ const uint8_t *ref_ptr, int ref_stride) { \ const uint32x4_t abs = \ sad64x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ - return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ + return horizontal_add_uint32x4(abs); \ } \ \ uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ @@ -346,7 +346,7 @@ const uint8_t *second_pred) { \ const uint32x4_t abs = \ sad64x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ - return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ + return horizontal_add_uint32x4(abs); \ } sad64xN(32); diff -Nru libvpx-1.10.0/vpx_dsp/arm/sum_neon.h libvpx-1.11.0/vpx_dsp/arm/sum_neon.h --- libvpx-1.10.0/vpx_dsp/arm/sum_neon.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/sum_neon.h 2021-10-06 17:41:19.000000000 +0000 @@ -16,23 +16,65 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" -static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) { +static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) { +#if defined(__aarch64__) + return vaddlvq_s16(a); +#else const int32x4_t b = vpaddlq_s16(a); const int64x2_t c = vpaddlq_s32(b); - return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)), - vreinterpret_s32_s64(vget_high_s64(c))); + const int32x2_t d = vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)), + vreinterpret_s32_s64(vget_high_s64(c))); + return vget_lane_s32(d, 0); +#endif } -static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) { +static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) { +#if defined(__aarch64__) + return vaddlvq_u16(a); +#else const uint32x4_t b = vpaddlq_u16(a); const uint64x2_t c = vpaddlq_u32(b); - return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), - vreinterpret_u32_u64(vget_high_u64(c))); + const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), + vreinterpret_u32_u64(vget_high_u64(c))); + return vget_lane_u32(d, 0); +#endif } -static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { +static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) { +#if defined(__aarch64__) + return vaddv_s32(a); +#else + return vget_lane_s32(a, 0) + vget_lane_s32(a, 1); +#endif +} + +static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) { +#if defined(__aarch64__) + return vaddv_u32(a); +#else + return vget_lane_u32(a, 0) + vget_lane_u32(a, 1); +#endif +} + +static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) { +#if defined(__aarch64__) + return vaddvq_s32(a); +#else + const int64x2_t b = vpaddlq_s32(a); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +#endif +} + +static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) { +#if defined(__aarch64__) + return vaddvq_u32(a); +#else const uint64x2_t b = vpaddlq_u32(a); - return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), - vreinterpret_u32_u64(vget_high_u64(b))); + const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), + vreinterpret_u32_u64(vget_high_u64(b))); + return vget_lane_u32(c, 0); +#endif } #endif // VPX_VPX_DSP_ARM_SUM_NEON_H_ diff -Nru libvpx-1.10.0/vpx_dsp/arm/variance_neon.c libvpx-1.11.0/vpx_dsp/arm/variance_neon.c --- libvpx-1.10.0/vpx_dsp/arm/variance_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/variance_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -19,6 +19,100 @@ #include "vpx_dsp/arm/sum_neon.h" #include "vpx_ports/mem.h" +#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1) + +// Process a block of width 4 four rows at a time. +static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int h, + uint32_t *sse, int *sum) { + int i; + uint32x4_t sum_a = vdupq_n_u32(0); + uint32x4_t sum_b = vdupq_n_u32(0); + uint32x4_t sse_u32 = vdupq_n_u32(0); + + for (i = 0; i < h; i += 4) { + const uint8x16_t a = load_unaligned_u8q(src_ptr, src_stride); + const uint8x16_t b = load_unaligned_u8q(ref_ptr, ref_stride); + + const uint8x16_t abs_diff = vabdq_u8(a, b); + sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff); + + sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1)); + sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1)); + + src_ptr += 4 * src_stride; + ref_ptr += 4 * ref_stride; + } + + *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b))); + *sse = horizontal_add_uint32x4(sse_u32); +} + +// Process a block of any size where the width is divisible by 16. +static void variance_neon_w16(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, + int h, uint32_t *sse, int *sum) { + int i, j; + uint32x4_t sum_a = vdupq_n_u32(0); + uint32x4_t sum_b = vdupq_n_u32(0); + uint32x4_t sse_u32 = vdupq_n_u32(0); + + for (i = 0; i < h; ++i) { + for (j = 0; j < w; j += 16) { + const uint8x16_t a = vld1q_u8(src_ptr + j); + const uint8x16_t b = vld1q_u8(ref_ptr + j); + + const uint8x16_t abs_diff = vabdq_u8(a, b); + sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff); + + sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1)); + sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1)); + } + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b))); + *sse = horizontal_add_uint32x4(sse_u32); +} + +// Process a block of width 8 two rows at a time. +static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int h, + uint32_t *sse, int *sum) { + int i = 0; + uint32x2_t sum_a = vdup_n_u32(0); + uint32x2_t sum_b = vdup_n_u32(0); + uint32x2_t sse_lo_u32 = vdup_n_u32(0); + uint32x2_t sse_hi_u32 = vdup_n_u32(0); + + do { + const uint8x8_t a_0 = vld1_u8(src_ptr); + const uint8x8_t a_1 = vld1_u8(src_ptr + src_stride); + const uint8x8_t b_0 = vld1_u8(ref_ptr); + const uint8x8_t b_1 = vld1_u8(ref_ptr + ref_stride); + + const uint8x8_t abs_diff_0 = vabd_u8(a_0, b_0); + const uint8x8_t abs_diff_1 = vabd_u8(a_1, b_1); + sse_lo_u32 = vdot_u32(sse_lo_u32, abs_diff_0, abs_diff_0); + sse_hi_u32 = vdot_u32(sse_hi_u32, abs_diff_1, abs_diff_1); + + sum_a = vdot_u32(sum_a, a_0, vdup_n_u8(1)); + sum_b = vdot_u32(sum_b, b_0, vdup_n_u8(1)); + sum_a = vdot_u32(sum_a, a_1, vdup_n_u8(1)); + sum_b = vdot_u32(sum_b, b_1, vdup_n_u8(1)); + + src_ptr += src_stride + src_stride; + ref_ptr += ref_stride + ref_stride; + i += 2; + } while (i < h); + + *sum = horizontal_add_int32x2(vreinterpret_s32_u32(vsub_u32(sum_a, sum_b))); + *sse = horizontal_add_uint32x2(vadd_u32(sse_lo_u32, sse_hi_u32)); +} + +#else + // The variance helper functions use int16_t for sum. 8 values are accumulated // and then added (at which point they expand up to int32_t). To avoid overflow, // there can be no more than 32767 / 255 ~= 128 values accumulated in each @@ -66,10 +160,9 @@ ref_ptr += 4 * ref_stride; } - *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); - *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( - vaddq_s32(sse_lo_s32, sse_hi_s32))), - 0); + *sum = horizontal_add_int16x8(sum_s16); + *sse = horizontal_add_uint32x4( + vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32))); } // Process a block of any size where the width is divisible by 16. @@ -115,10 +208,9 @@ ref_ptr += ref_stride; } - *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); - *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( - vaddq_s32(sse_lo_s32, sse_hi_s32))), - 0); + *sum = horizontal_add_int16x8(sum_s16); + *sse = horizontal_add_uint32x4( + vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32))); } // Process a block of width 8 two rows at a time. @@ -157,12 +249,13 @@ i += 2; } while (i < h); - *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); - *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( - vaddq_s32(sse_lo_s32, sse_hi_s32))), - 0); + *sum = horizontal_add_int16x8(sum_s16); + *sse = horizontal_add_uint32x4( + vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32))); } +#endif + void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { @@ -264,117 +357,165 @@ return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12); } +#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1) + unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse) { int i; - int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; - int64x1_t d0s64; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; - - q7s32 = vdupq_n_s32(0); - q8s32 = vdupq_n_s32(0); - q9s32 = vdupq_n_s32(0); - q10s32 = vdupq_n_s32(0); + uint8x16_t a[2], b[2], abs_diff[2]; + uint32x4_t sse_vec[2] = { vdupq_n_u32(0), vdupq_n_u32(0) }; - for (i = 0; i < 8; i++) { // mse16x16_neon_loop - q0u8 = vld1q_u8(src_ptr); + for (i = 0; i < 8; i++) { + a[0] = vld1q_u8(src_ptr); src_ptr += src_stride; - q1u8 = vld1q_u8(src_ptr); + a[1] = vld1q_u8(src_ptr); src_ptr += src_stride; - q2u8 = vld1q_u8(ref_ptr); + b[0] = vld1q_u8(ref_ptr); ref_ptr += ref_stride; - q3u8 = vld1q_u8(ref_ptr); + b[1] = vld1q_u8(ref_ptr); ref_ptr += ref_stride; - q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); - q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); - q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); - q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); - - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - q7s32 = vmlal_s16(q7s32, d22s16, d22s16); - q8s32 = vmlal_s16(q8s32, d23s16, d23s16); - - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - q9s32 = vmlal_s16(q9s32, d24s16, d24s16); - q10s32 = vmlal_s16(q10s32, d25s16, d25s16); - - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - q7s32 = vmlal_s16(q7s32, d26s16, d26s16); - q8s32 = vmlal_s16(q8s32, d27s16, d27s16); - - d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); - d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); - q9s32 = vmlal_s16(q9s32, d28s16, d28s16); - q10s32 = vmlal_s16(q10s32, d29s16, d29s16); - } - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q10s32 = vaddq_s32(q7s32, q9s32); + abs_diff[0] = vabdq_u8(a[0], b[0]); + abs_diff[1] = vabdq_u8(a[1], b[1]); - q1s64 = vpaddlq_s32(q10s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); + sse_vec[0] = vdotq_u32(sse_vec[0], abs_diff[0], abs_diff[0]); + sse_vec[1] = vdotq_u32(sse_vec[1], abs_diff[1], abs_diff[1]); + } - vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); + *sse = horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1])); + return horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1])); } unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride) { - int16x4_t d22s16, d24s16, d26s16, d28s16; - int64x1_t d0s64; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - int32x4_t q7s32, q8s32, q9s32, q10s32; - uint16x8_t q11u16, q12u16, q13u16, q14u16; - int64x2_t q1s64; + uint8x8_t a[4], b[4], abs_diff[4]; + uint32x2_t sse = vdup_n_u32(0); - d0u8 = vld1_u8(src_ptr); + a[0] = vld1_u8(src_ptr); + src_ptr += src_stride; + b[0] = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + a[1] = vld1_u8(src_ptr); + src_ptr += src_stride; + b[1] = vld1_u8(ref_ptr); + ref_ptr += ref_stride; + a[2] = vld1_u8(src_ptr); src_ptr += src_stride; - d4u8 = vld1_u8(ref_ptr); + b[2] = vld1_u8(ref_ptr); ref_ptr += ref_stride; - d1u8 = vld1_u8(src_ptr); + a[3] = vld1_u8(src_ptr); + b[3] = vld1_u8(ref_ptr); + + abs_diff[0] = vabd_u8(a[0], b[0]); + abs_diff[1] = vabd_u8(a[1], b[1]); + abs_diff[2] = vabd_u8(a[2], b[2]); + abs_diff[3] = vabd_u8(a[3], b[3]); + + sse = vdot_u32(sse, abs_diff[0], abs_diff[0]); + sse = vdot_u32(sse, abs_diff[1], abs_diff[1]); + sse = vdot_u32(sse, abs_diff[2], abs_diff[2]); + sse = vdot_u32(sse, abs_diff[3], abs_diff[3]); + + return vget_lane_u32(sse, 0); +} + +#else + +unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, int ref_stride, + unsigned int *sse) { + int i; + uint8x16_t a[2], b[2]; + int16x4_t diff_lo[4], diff_hi[4]; + uint16x8_t diff[4]; + int32x4_t sse_vec[4] = { vdupq_n_s32(0), vdupq_n_s32(0), vdupq_n_s32(0), + vdupq_n_s32(0) }; + + for (i = 0; i < 8; i++) { + a[0] = vld1q_u8(src_ptr); + src_ptr += src_stride; + a[1] = vld1q_u8(src_ptr); + src_ptr += src_stride; + b[0] = vld1q_u8(ref_ptr); + ref_ptr += ref_stride; + b[1] = vld1q_u8(ref_ptr); + ref_ptr += ref_stride; + + diff[0] = vsubl_u8(vget_low_u8(a[0]), vget_low_u8(b[0])); + diff[1] = vsubl_u8(vget_high_u8(a[0]), vget_high_u8(b[0])); + diff[2] = vsubl_u8(vget_low_u8(a[1]), vget_low_u8(b[1])); + diff[3] = vsubl_u8(vget_high_u8(a[1]), vget_high_u8(b[1])); + + diff_lo[0] = vreinterpret_s16_u16(vget_low_u16(diff[0])); + diff_lo[1] = vreinterpret_s16_u16(vget_low_u16(diff[1])); + sse_vec[0] = vmlal_s16(sse_vec[0], diff_lo[0], diff_lo[0]); + sse_vec[1] = vmlal_s16(sse_vec[1], diff_lo[1], diff_lo[1]); + + diff_lo[2] = vreinterpret_s16_u16(vget_low_u16(diff[2])); + diff_lo[3] = vreinterpret_s16_u16(vget_low_u16(diff[3])); + sse_vec[2] = vmlal_s16(sse_vec[2], diff_lo[2], diff_lo[2]); + sse_vec[3] = vmlal_s16(sse_vec[3], diff_lo[3], diff_lo[3]); + + diff_hi[0] = vreinterpret_s16_u16(vget_high_u16(diff[0])); + diff_hi[1] = vreinterpret_s16_u16(vget_high_u16(diff[1])); + sse_vec[0] = vmlal_s16(sse_vec[0], diff_hi[0], diff_hi[0]); + sse_vec[1] = vmlal_s16(sse_vec[1], diff_hi[1], diff_hi[1]); + + diff_hi[2] = vreinterpret_s16_u16(vget_high_u16(diff[2])); + diff_hi[3] = vreinterpret_s16_u16(vget_high_u16(diff[3])); + sse_vec[2] = vmlal_s16(sse_vec[2], diff_hi[2], diff_hi[2]); + sse_vec[3] = vmlal_s16(sse_vec[3], diff_hi[3], diff_hi[3]); + } + + sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[1]); + sse_vec[2] = vaddq_s32(sse_vec[2], sse_vec[3]); + sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[2]); + + *sse = horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0])); + return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0])); +} + +unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride, + const unsigned char *ref_ptr, + int ref_stride) { + uint8x8_t a[4], b[4]; + int16x4_t diff_lo[4]; + uint16x8_t diff[4]; + int32x4_t sse; + + a[0] = vld1_u8(src_ptr); src_ptr += src_stride; - d5u8 = vld1_u8(ref_ptr); + b[0] = vld1_u8(ref_ptr); ref_ptr += ref_stride; - d2u8 = vld1_u8(src_ptr); + a[1] = vld1_u8(src_ptr); src_ptr += src_stride; - d6u8 = vld1_u8(ref_ptr); + b[1] = vld1_u8(ref_ptr); ref_ptr += ref_stride; - d3u8 = vld1_u8(src_ptr); + a[2] = vld1_u8(src_ptr); src_ptr += src_stride; - d7u8 = vld1_u8(ref_ptr); + b[2] = vld1_u8(ref_ptr); ref_ptr += ref_stride; + a[3] = vld1_u8(src_ptr); + b[3] = vld1_u8(ref_ptr); - q11u16 = vsubl_u8(d0u8, d4u8); - q12u16 = vsubl_u8(d1u8, d5u8); - q13u16 = vsubl_u8(d2u8, d6u8); - q14u16 = vsubl_u8(d3u8, d7u8); - - d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); - d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); - d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); - d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); - - q7s32 = vmull_s16(d22s16, d22s16); - q8s32 = vmull_s16(d24s16, d24s16); - q9s32 = vmull_s16(d26s16, d26s16); - q10s32 = vmull_s16(d28s16, d28s16); - - q7s32 = vaddq_s32(q7s32, q8s32); - q9s32 = vaddq_s32(q9s32, q10s32); - q9s32 = vaddq_s32(q7s32, q9s32); + diff[0] = vsubl_u8(a[0], b[0]); + diff[1] = vsubl_u8(a[1], b[1]); + diff[2] = vsubl_u8(a[2], b[2]); + diff[3] = vsubl_u8(a[3], b[3]); + + diff_lo[0] = vget_low_s16(vreinterpretq_s16_u16(diff[0])); + diff_lo[1] = vget_low_s16(vreinterpretq_s16_u16(diff[1])); + diff_lo[2] = vget_low_s16(vreinterpretq_s16_u16(diff[2])); + diff_lo[3] = vget_low_s16(vreinterpretq_s16_u16(diff[3])); + + sse = vmull_s16(diff_lo[0], diff_lo[0]); + sse = vmlal_s16(sse, diff_lo[1], diff_lo[1]); + sse = vmlal_s16(sse, diff_lo[2], diff_lo[2]); + sse = vmlal_s16(sse, diff_lo[3], diff_lo[3]); - q1s64 = vpaddlq_s32(q9s32); - d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); - - return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); + return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse)); } + +#endif diff -Nru libvpx-1.10.0/vpx_dsp/arm/vpx_convolve8_neon.c libvpx-1.11.0/vpx_dsp/arm/vpx_convolve8_neon.c --- libvpx-1.10.0/vpx_dsp/arm/vpx_convolve8_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/vpx_convolve8_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/arm/vpx_convolve8_neon.h" #include "vpx_ports/mem.h" @@ -30,6 +31,741 @@ // instructions. This optimization is much faster in speed unit test, but slowed // down the whole decoder by 5%. +#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \ + (__ARM_FEATURE_DOTPROD == 1) +DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = { + 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, + 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, + 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 +}; + +DECLARE_ALIGNED(16, static const uint8_t, dot_prod_tran_concat_tbl[32]) = { + 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, + 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 +}; + +DECLARE_ALIGNED(16, static const uint8_t, dot_prod_merge_block_tbl[48]) = { + /* Shift left and insert new last column in transposed 4x4 block. */ + 1, 2, 3, 16, 5, 6, 7, 20, 9, 10, 11, 24, 13, 14, 15, 28, + /* Shift left and insert two new columns in transposed 4x4 block. */ + 2, 3, 16, 17, 6, 7, 20, 21, 10, 11, 24, 25, 14, 15, 28, 29, + /* Shift left and insert three new columns in transposed 4x4 block. */ + 3, 16, 17, 18, 7, 20, 21, 22, 11, 24, 25, 26, 15, 28, 29, 30 +}; + +static INLINE void transpose_concat_4x4(int8x8_t *a0, int8x8_t *a1, + int8x8_t *a2, int8x8_t *a3, + int8x16_t *b, + const uint8x16_t permute_tbl) { + /* Transpose 8-bit elements and concatenate result rows as follows: + * a0: 00, 01, 02, 03, XX, XX, XX, XX + * a1: 10, 11, 12, 13, XX, XX, XX, XX + * a2: 20, 21, 22, 23, XX, XX, XX, XX + * a3: 30, 31, 32, 33, XX, XX, XX, XX + * + * b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33 + * + * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it + * as an argument is preferable to loading it directly from memory as this + * inline helper is called many times from the same parent function. + */ + + int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } }; + *b = vqtbl2q_s8(samples, permute_tbl); +} + +static INLINE void transpose_concat_8x4(int8x8_t *a0, int8x8_t *a1, + int8x8_t *a2, int8x8_t *a3, + int8x16_t *b0, int8x16_t *b1, + const uint8x16x2_t permute_tbl) { + /* Transpose 8-bit elements and concatenate result rows as follows: + * a0: 00, 01, 02, 03, 04, 05, 06, 07 + * a1: 10, 11, 12, 13, 14, 15, 16, 17 + * a2: 20, 21, 22, 23, 24, 25, 26, 27 + * a3: 30, 31, 32, 33, 34, 35, 36, 37 + * + * b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33 + * b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37 + * + * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it + * as an argument is preferable to loading it directly from memory as this + * inline helper is called many times from the same parent function. + */ + + int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } }; + *b0 = vqtbl2q_s8(samples, permute_tbl.val[0]); + *b1 = vqtbl2q_s8(samples, permute_tbl.val[1]); +} + +void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, int w, + int h) { + const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4])); + const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128); + const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp)); + const uint8x16_t range_limit = vdupq_n_u8(128); + uint8x16_t s0, s1, s2, s3; + + assert(!((intptr_t)dst & 3)); + assert(!(dst_stride & 3)); + assert(x_step_q4 == 16); + + (void)x_step_q4; + (void)y0_q4; + (void)y_step_q4; + + src -= 3; + + if (w == 4) { + const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl); + do { + int32x4_t t0, t1, t2, t3; + int16x8_t t01, t23; + uint8x8_t d01, d23; + + s0 = vld1q_u8(src); + src += src_stride; + s1 = vld1q_u8(src); + src += src_stride; + s2 = vld1q_u8(src); + src += src_stride; + s3 = vld1q_u8(src); + src += src_stride; + + t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl); + t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl); + t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl); + t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl); + + t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1)); + t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3)); + d01 = vqrshrun_n_s16(t01, 7); + d23 = vqrshrun_n_s16(t23, 7); + + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1); + dst += dst_stride; + h -= 4; + } while (h > 0); + } else { + const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl); + const uint8_t *s; + uint8_t *d; + int width; + uint8x8_t d0, d1, d2, d3; + + do { + width = w; + s = src; + d = dst; + do { + s0 = vld1q_u8(s + 0 * src_stride); + s1 = vld1q_u8(s + 1 * src_stride); + s2 = vld1q_u8(s + 2 * src_stride); + s3 = vld1q_u8(s + 3 * src_stride); + + d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl); + d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl); + d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl); + d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl); + + vst1_u8(d + 0 * dst_stride, d0); + vst1_u8(d + 1 * dst_stride, d1); + vst1_u8(d + 2 * dst_stride, d2); + vst1_u8(d + 3 * dst_stride, d3); + + s += 8; + d += 8; + width -= 8; + } while (width > 0); + src += 4 * src_stride; + dst += 4 * dst_stride; + h -= 4; + } while (h > 0); + } +} + +void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, + int w, int h) { + const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4])); + const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128); + const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp)); + const uint8x16_t range_limit = vdupq_n_u8(128); + uint8x16_t s0, s1, s2, s3; + + assert(!((intptr_t)dst & 3)); + assert(!(dst_stride & 3)); + assert(x_step_q4 == 16); + + (void)x_step_q4; + (void)y0_q4; + (void)y_step_q4; + + src -= 3; + + if (w == 4) { + const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl); + do { + int32x4_t t0, t1, t2, t3; + int16x8_t t01, t23; + uint8x8_t d01, d23, dd01, dd23; + dd01 = vdup_n_u8(0); + dd23 = vdup_n_u8(0); + + s0 = vld1q_u8(src); + src += src_stride; + s1 = vld1q_u8(src); + src += src_stride; + s2 = vld1q_u8(src); + src += src_stride; + s3 = vld1q_u8(src); + src += src_stride; + + t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl); + t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl); + t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl); + t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl); + + t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1)); + t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3)); + d01 = vqrshrun_n_s16(t01, 7); + d23 = vqrshrun_n_s16(t23, 7); + + dd01 = load_u8(dst + 0 * dst_stride, dst_stride); + dd23 = load_u8(dst + 2 * dst_stride, dst_stride); + d01 = vrhadd_u8(d01, dd01); + d23 = vrhadd_u8(d23, dd23); + + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1); + dst += dst_stride; + h -= 4; + } while (h > 0); + } else { + const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl); + const uint8_t *s; + uint8_t *d; + int width; + uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3; + + do { + width = w; + s = src; + d = dst; + do { + s0 = vld1q_u8(s + 0 * src_stride); + s1 = vld1q_u8(s + 1 * src_stride); + s2 = vld1q_u8(s + 2 * src_stride); + s3 = vld1q_u8(s + 3 * src_stride); + + d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl); + d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl); + d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl); + d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl); + + dd0 = vld1_u8(d + 0 * dst_stride); + dd1 = vld1_u8(d + 1 * dst_stride); + dd2 = vld1_u8(d + 2 * dst_stride); + dd3 = vld1_u8(d + 3 * dst_stride); + d0 = vrhadd_u8(d0, dd0); + d1 = vrhadd_u8(d1, dd1); + d2 = vrhadd_u8(d2, dd2); + d3 = vrhadd_u8(d3, dd3); + + vst1_u8(d + 0 * dst_stride, d0); + vst1_u8(d + 1 * dst_stride, d1); + vst1_u8(d + 2 * dst_stride, d2); + vst1_u8(d + 3 * dst_stride, d3); + + s += 8; + d += 8; + width -= 8; + } while (width > 0); + src += 4 * src_stride; + dst += 4 * dst_stride; + h -= 4; + } while (h > 0); + } +} + +void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, int w, + int h) { + const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4])); + const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128); + const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp)); + const uint8x8_t range_limit = vdup_n_u8(128); + const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl); + uint8x8_t t0, t1, t2, t3, t4, t5, t6; + int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; + int8x16x2_t samples_LUT; + + assert(!((intptr_t)dst & 3)); + assert(!(dst_stride & 3)); + assert(y_step_q4 == 16); + + (void)x0_q4; + (void)x_step_q4; + (void)y_step_q4; + + src -= 3 * src_stride; + + if (w == 4) { + const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl); + int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910; + int32x4_t d0, d1, d2, d3; + uint8x8_t d01, d23; + + load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); + src += 4 * src_stride; + t4 = vld1_u8(src); + src += src_stride; + t5 = vld1_u8(src); + src += src_stride; + t6 = vld1_u8(src); + src += src_stride; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit)); + s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit)); + s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit)); + s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit)); + s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit)); + s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit)); + s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit)); + s7 = vdup_n_s8(0); + s8 = vdup_n_s8(0); + s9 = vdup_n_s8(0); + + /* This operation combines a conventional transpose and the sample permute + * (see horizontal case) required before computing the dot product. + */ + transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl); + transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl); + transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl); + transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl); + transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl); + transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl); + transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl); + + do { + uint8x8_t t7, t8, t9, t10; + + load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10); + + s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit)); + s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit)); + s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit)); + s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit)); + + transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl); + + /* Merge new data into block from previous iteration. */ + samples_LUT.val[0] = s3456; + samples_LUT.val[1] = s78910; + s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters); + d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters); + d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters); + d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters); + + d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7); + d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7); + + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1); + dst += dst_stride; + + /* Prepare block for next iteration - re-using as much as possible. */ + /* Shuffle everything up four rows. */ + s0123 = s4567; + s1234 = s5678; + s2345 = s6789; + s3456 = s78910; + + src += 4 * src_stride; + h -= 4; + } while (h > 0); + } else { + const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl); + int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi, + s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo, + s6789_hi, s78910_lo, s78910_hi; + uint8x8_t d0, d1, d2, d3; + const uint8_t *s; + uint8_t *d; + int height; + + do { + height = h; + s = src; + d = dst; + + load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); + s += 4 * src_stride; + t4 = vld1_u8(s); + s += src_stride; + t5 = vld1_u8(s); + s += src_stride; + t6 = vld1_u8(s); + s += src_stride; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit)); + s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit)); + s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit)); + s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit)); + s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit)); + s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit)); + s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit)); + s7 = vdup_n_s8(0); + s8 = vdup_n_s8(0); + s9 = vdup_n_s8(0); + + /* This operation combines a conventional transpose and the sample permute + * (see horizontal case) required before computing the dot product. + */ + transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi, + tran_concat_tbl); + transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi, + tran_concat_tbl); + transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi, + tran_concat_tbl); + transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi, + tran_concat_tbl); + transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi, + tran_concat_tbl); + transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi, + tran_concat_tbl); + transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi, + tran_concat_tbl); + + do { + uint8x8_t t7, t8, t9, t10; + + load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10); + + s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit)); + s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit)); + s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit)); + s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit)); + + transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi, + tran_concat_tbl); + + /* Merge new data into block from previous iteration. */ + samples_LUT.val[0] = s3456_lo; + samples_LUT.val[1] = s78910_lo; + s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + samples_LUT.val[0] = s3456_hi; + samples_LUT.val[1] = s78910_hi; + s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi, + correction, filters); + d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi, + correction, filters); + d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi, + correction, filters); + d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi, + correction, filters); + vst1_u8(d + 0 * dst_stride, d0); + vst1_u8(d + 1 * dst_stride, d1); + vst1_u8(d + 2 * dst_stride, d2); + vst1_u8(d + 3 * dst_stride, d3); + + /* Prepare block for next iteration - re-using as much as possible. */ + /* Shuffle everything up four rows. */ + s0123_lo = s4567_lo; + s0123_hi = s4567_hi; + s1234_lo = s5678_lo; + s1234_hi = s5678_hi; + s2345_lo = s6789_lo; + s2345_hi = s6789_hi; + s3456_lo = s78910_lo; + s3456_hi = s78910_hi; + + s += 4 * src_stride; + d += 4 * dst_stride; + height -= 4; + } while (height > 0); + src += 8; + dst += 8; + w -= 8; + } while (w > 0); + } +} + +void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int y0_q4, int y_step_q4, int w, + int h) { + const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4])); + const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128); + const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp)); + const uint8x8_t range_limit = vdup_n_u8(128); + const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl); + uint8x8_t t0, t1, t2, t3, t4, t5, t6; + int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; + int8x16x2_t samples_LUT; + + assert(!((intptr_t)dst & 3)); + assert(!(dst_stride & 3)); + assert(y_step_q4 == 16); + + (void)x0_q4; + (void)x_step_q4; + (void)y_step_q4; + + src -= 3 * src_stride; + + if (w == 4) { + const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl); + int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910; + int32x4_t d0, d1, d2, d3; + uint8x8_t d01, d23, dd01, dd23; + + load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); + src += 4 * src_stride; + t4 = vld1_u8(src); + src += src_stride; + t5 = vld1_u8(src); + src += src_stride; + t6 = vld1_u8(src); + src += src_stride; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit)); + s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit)); + s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit)); + s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit)); + s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit)); + s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit)); + s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit)); + s7 = vdup_n_s8(0); + s8 = vdup_n_s8(0); + s9 = vdup_n_s8(0); + + /* This operation combines a conventional transpose and the sample permute + * (see horizontal case) required before computing the dot product. + */ + transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl); + transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl); + transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl); + transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl); + transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl); + transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl); + transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl); + + do { + uint8x8_t t7, t8, t9, t10; + + load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10); + + s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit)); + s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit)); + s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit)); + s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit)); + + transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl); + + /* Merge new data into block from previous iteration. */ + samples_LUT.val[0] = s3456; + samples_LUT.val[1] = s78910; + s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters); + d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters); + d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters); + d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters); + + d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7); + d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7); + + dd01 = load_u8(dst + 0 * dst_stride, dst_stride); + dd23 = load_u8(dst + 2 * dst_stride, dst_stride); + d01 = vrhadd_u8(d01, dd01); + d23 = vrhadd_u8(d23, dd23); + + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0); + dst += dst_stride; + vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1); + dst += dst_stride; + + /* Prepare block for next iteration - re-using as much as possible. */ + /* Shuffle everything up four rows. */ + s0123 = s4567; + s1234 = s5678; + s2345 = s6789; + s3456 = s78910; + + src += 4 * src_stride; + h -= 4; + } while (h > 0); + } else { + const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl); + int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi, + s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo, + s6789_hi, s78910_lo, s78910_hi; + uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3; + const uint8_t *s; + uint8_t *d; + int height; + + do { + height = h; + s = src; + d = dst; + + load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3); + s += 4 * src_stride; + t4 = vld1_u8(s); + s += src_stride; + t5 = vld1_u8(s); + s += src_stride; + t6 = vld1_u8(s); + s += src_stride; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit)); + s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit)); + s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit)); + s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit)); + s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit)); + s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit)); + s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit)); + s7 = vdup_n_s8(0); + s8 = vdup_n_s8(0); + s9 = vdup_n_s8(0); + + /* This operation combines a conventional transpose and the sample permute + * (see horizontal case) required before computing the dot product. + */ + transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi, + tran_concat_tbl); + transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi, + tran_concat_tbl); + transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi, + tran_concat_tbl); + transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi, + tran_concat_tbl); + transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi, + tran_concat_tbl); + transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi, + tran_concat_tbl); + transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi, + tran_concat_tbl); + + do { + uint8x8_t t7, t8, t9, t10; + + load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10); + + s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit)); + s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit)); + s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit)); + s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit)); + + transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi, + tran_concat_tbl); + + /* Merge new data into block from previous iteration. */ + samples_LUT.val[0] = s3456_lo; + samples_LUT.val[1] = s78910_lo; + s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + samples_LUT.val[0] = s3456_hi; + samples_LUT.val[1] = s78910_hi; + s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]); + s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]); + s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]); + + d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi, + correction, filters); + d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi, + correction, filters); + d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi, + correction, filters); + d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi, + correction, filters); + + dd0 = vld1_u8(d + 0 * dst_stride); + dd1 = vld1_u8(d + 1 * dst_stride); + dd2 = vld1_u8(d + 2 * dst_stride); + dd3 = vld1_u8(d + 3 * dst_stride); + d0 = vrhadd_u8(d0, dd0); + d1 = vrhadd_u8(d1, dd1); + d2 = vrhadd_u8(d2, dd2); + d3 = vrhadd_u8(d3, dd3); + + vst1_u8(d + 0 * dst_stride, d0); + vst1_u8(d + 1 * dst_stride, d1); + vst1_u8(d + 2 * dst_stride, d2); + vst1_u8(d + 3 * dst_stride, d3); + + /* Prepare block for next iteration - re-using as much as possible. */ + /* Shuffle everything up four rows. */ + s0123_lo = s4567_lo; + s0123_hi = s4567_hi; + s1234_lo = s5678_lo; + s1234_hi = s5678_hi; + s2345_lo = s6789_lo; + s2345_hi = s6789_hi; + s3456_lo = s78910_lo; + s3456_hi = s78910_hi; + + s += 4 * src_stride; + d += 4 * dst_stride; + height -= 4; + } while (height > 0); + src += 8; + dst += 8; + w -= 8; + } while (w > 0); + } +} + +#else + static INLINE void store_u8_8x8(uint8_t *s, const ptrdiff_t p, const uint8x8_t s0, const uint8x8_t s1, const uint8x8_t s2, const uint8x8_t s3, @@ -145,7 +881,7 @@ src += 4; dst += 4; w -= 4; - } while (w > 0); + } while (w != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -296,7 +1032,7 @@ s += 8; d += 8; width -= 8; - } while (width > 0); + } while (width != 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; @@ -402,7 +1138,7 @@ src += 4; dst += 4; w -= 4; - } while (w > 0); + } while (w != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -586,7 +1322,7 @@ s += 8; d += 8; width -= 8; - } while (width > 0); + } while (width != 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; @@ -679,7 +1415,7 @@ s5 = s9; s6 = s10; h -= 4; - } while (h > 0); + } while (h != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -759,11 +1495,11 @@ s5 = s9; s6 = s10; height -= 4; - } while (height > 0); + } while (height != 0); src += 8; dst += 8; w -= 8; - } while (w > 0); + } while (w != 0); } } @@ -860,7 +1596,7 @@ s5 = s9; s6 = s10; h -= 4; - } while (h > 0); + } while (h != 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); @@ -950,10 +1686,12 @@ s5 = s9; s6 = s10; height -= 4; - } while (height > 0); + } while (height != 0); src += 8; dst += 8; w -= 8; - } while (w > 0); + } while (w != 0); } } + +#endif diff -Nru libvpx-1.10.0/vpx_dsp/arm/vpx_convolve8_neon.h libvpx-1.11.0/vpx_dsp/arm/vpx_convolve8_neon.h --- libvpx-1.10.0/vpx_dsp/arm/vpx_convolve8_neon.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/vpx_convolve8_neon.h 2021-10-06 17:41:19.000000000 +0000 @@ -72,6 +72,107 @@ *s7 = vld1q_u8(s); } +#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \ + (__ARM_FEATURE_DOTPROD == 1) + +static INLINE int32x4_t convolve8_4_dot_partial(const int8x16_t samples_lo, + const int8x16_t samples_hi, + const int32x4_t correction, + const int8x8_t filters) { + /* Sample range-clamping and permutation are performed by the caller. */ + int32x4_t sum; + + /* Accumulate dot product into 'correction' to account for range clamp. */ + sum = vdotq_lane_s32(correction, samples_lo, filters, 0); + sum = vdotq_lane_s32(sum, samples_hi, filters, 1); + + /* Narrowing and packing is performed by the caller. */ + return sum; +} + +static INLINE int32x4_t convolve8_4_dot(uint8x16_t samples, + const int8x8_t filters, + const int32x4_t correction, + const uint8x16_t range_limit, + const uint8x16x2_t permute_tbl) { + int8x16_t clamped_samples, permuted_samples[2]; + int32x4_t sum; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit)); + + /* Permute samples ready for dot product. */ + /* { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 } */ + permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]); + /* { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 } */ + permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]); + + /* Accumulate dot product into 'correction' to account for range clamp. */ + sum = vdotq_lane_s32(correction, permuted_samples[0], filters, 0); + sum = vdotq_lane_s32(sum, permuted_samples[1], filters, 1); + + /* Narrowing and packing is performed by the caller. */ + return sum; +} + +static INLINE uint8x8_t convolve8_8_dot_partial(const int8x16_t samples0_lo, + const int8x16_t samples0_hi, + const int8x16_t samples1_lo, + const int8x16_t samples1_hi, + const int32x4_t correction, + const int8x8_t filters) { + /* Sample range-clamping and permutation are performed by the caller. */ + int32x4_t sum0, sum1; + int16x8_t sum; + + /* Accumulate dot product into 'correction' to account for range clamp. */ + /* First 4 output values. */ + sum0 = vdotq_lane_s32(correction, samples0_lo, filters, 0); + sum0 = vdotq_lane_s32(sum0, samples0_hi, filters, 1); + /* Second 4 output values. */ + sum1 = vdotq_lane_s32(correction, samples1_lo, filters, 0); + sum1 = vdotq_lane_s32(sum1, samples1_hi, filters, 1); + + /* Narrow and re-pack. */ + sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1)); + return vqrshrun_n_s16(sum, 7); +} + +static INLINE uint8x8_t convolve8_8_dot(uint8x16_t samples, + const int8x8_t filters, + const int32x4_t correction, + const uint8x16_t range_limit, + const uint8x16x3_t permute_tbl) { + int8x16_t clamped_samples, permuted_samples[3]; + int32x4_t sum0, sum1; + int16x8_t sum; + + /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */ + clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit)); + + /* Permute samples ready for dot product. */ + /* { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 } */ + permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]); + /* { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 } */ + permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]); + /* { 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */ + permuted_samples[2] = vqtbl1q_s8(clamped_samples, permute_tbl.val[2]); + + /* Accumulate dot product into 'correction' to account for range clamp. */ + /* First 4 output values. */ + sum0 = vdotq_lane_s32(correction, permuted_samples[0], filters, 0); + sum0 = vdotq_lane_s32(sum0, permuted_samples[1], filters, 1); + /* Second 4 output values. */ + sum1 = vdotq_lane_s32(correction, permuted_samples[1], filters, 0); + sum1 = vdotq_lane_s32(sum1, permuted_samples[2], filters, 1); + + /* Narrow and re-pack. */ + sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1)); + return vqrshrun_n_s16(sum, 7); +} + +#endif + static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, diff -Nru libvpx-1.10.0/vpx_dsp/arm/vpx_convolve_avg_neon.c libvpx-1.11.0/vpx_dsp/arm/vpx_convolve_avg_neon.c --- libvpx-1.10.0/vpx_dsp/arm/vpx_convolve_avg_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/vpx_convolve_avg_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -43,7 +43,7 @@ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dd0), 1); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w == 8) { // avg8 uint8x8_t s0, s1, d0, d1; uint8x16_t s01, d01; @@ -64,7 +64,7 @@ vst1_u8(dst, vget_high_u8(d01)); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w < 32) { // avg16 uint8x16_t s0, s1, d0, d1; do { @@ -83,7 +83,7 @@ vst1q_u8(dst, d1); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w == 32) { // avg32 uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3; do { @@ -110,7 +110,7 @@ vst1q_u8(dst + 16, d3); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else { // avg64 uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3; do { diff -Nru libvpx-1.10.0/vpx_dsp/arm/vpx_convolve_copy_neon.c libvpx-1.11.0/vpx_dsp/arm/vpx_convolve_copy_neon.c --- libvpx-1.10.0/vpx_dsp/arm/vpx_convolve_copy_neon.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_dsp/arm/vpx_convolve_copy_neon.c 2021-10-06 17:41:19.000000000 +0000 @@ -33,7 +33,7 @@ src += src_stride; dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w == 8) { // copy8 uint8x8_t s0, s1; do { @@ -47,7 +47,7 @@ vst1_u8(dst, s1); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w < 32) { // copy16 uint8x16_t s0, s1; do { @@ -61,7 +61,7 @@ vst1q_u8(dst, s1); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else if (w == 32) { // copy32 uint8x16_t s0, s1, s2, s3; do { @@ -79,7 +79,7 @@ vst1q_u8(dst + 16, s3); dst += dst_stride; h -= 2; - } while (h > 0); + } while (h != 0); } else { // copy64 uint8x16_t s0, s1, s2, s3; do { diff -Nru libvpx-1.10.0/vpxenc.c libvpx-1.11.0/vpxenc.c --- libvpx-1.10.0/vpxenc.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpxenc.c 2021-10-06 17:41:19.000000000 +0000 @@ -114,10 +114,6 @@ ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)"); static const arg_def_t fpf_name = ARG_DEF(NULL, "fpf", 1, "First pass statistics file name"); -#if CONFIG_FP_MB_STATS -static const arg_def_t fpmbf_name = - ARG_DEF(NULL, "fpmbf", 1, "First pass block statistics file name"); -#endif static const arg_def_t limit = ARG_DEF(NULL, "limit", 1, "Stop encoding after n input frames"); static const arg_def_t skip = @@ -287,6 +283,64 @@ &buf_sz, &buf_initial_sz, &buf_optimal_sz, NULL }; +#if CONFIG_VP9_ENCODER +static const arg_def_t use_vizier_rc_params = + ARG_DEF(NULL, "use-vizier-rc-params", 1, "Use vizier rc params"); +static const arg_def_t active_wq_factor = + ARG_DEF(NULL, "active-wq-factor", 1, "Active worst quality factor"); +static const arg_def_t err_per_mb_factor = + ARG_DEF(NULL, "err-per-mb-factor", 1, "Error per macroblock factor"); +static const arg_def_t sr_default_decay_limit = ARG_DEF( + NULL, "sr-default-decay-limit", 1, "Second reference default decay limit"); +static const arg_def_t sr_diff_factor = + ARG_DEF(NULL, "sr-diff-factor", 1, "Second reference diff factor"); +static const arg_def_t kf_err_per_mb_factor = ARG_DEF( + NULL, "kf-err-per-mb-factor", 1, "Keyframe error per macroblock factor"); +static const arg_def_t kf_frame_min_boost_factor = + ARG_DEF(NULL, "kf-frame-min-boost-factor", 1, "Keyframe min boost"); +static const arg_def_t kf_frame_max_boost_first_factor = + ARG_DEF(NULL, "kf-frame-max-boost-first-factor", 1, + "Max keyframe boost adjustment factor for first frame"); +static const arg_def_t kf_frame_max_boost_subs_factor = + ARG_DEF(NULL, "kf-frame-max-boost-subs-factor", 1, + "Max boost adjustment factor for subsequent KFs"); +static const arg_def_t kf_max_total_boost_factor = ARG_DEF( + NULL, "kf-max-total-boost-factor", 1, "Keyframe max total boost factor"); +static const arg_def_t gf_max_total_boost_factor = + ARG_DEF(NULL, "gf-max-total-boost-factor", 1, + "Golden frame max total boost factor"); +static const arg_def_t gf_frame_max_boost_factor = + ARG_DEF(NULL, "gf-frame-max-boost-factor", 1, + "Golden frame max per frame boost factor"); +static const arg_def_t zm_factor = + ARG_DEF(NULL, "zm-factor", 1, "Zero motion power factor"); +static const arg_def_t rd_mult_inter_qp_fac = + ARG_DEF(NULL, "rd-mult-inter-qp-fac", 1, + "RD multiplier adjustment for inter frames"); +static const arg_def_t rd_mult_arf_qp_fac = + ARG_DEF(NULL, "rd-mult-arf-qp-fac", 1, + "RD multiplier adjustment for alt-ref frames"); +static const arg_def_t rd_mult_key_qp_fac = ARG_DEF( + NULL, "rd-mult-key-qp-fac", 1, "RD multiplier adjustment for key frames"); +static const arg_def_t *vizier_rc_args[] = { &use_vizier_rc_params, + &active_wq_factor, + &err_per_mb_factor, + &sr_default_decay_limit, + &sr_diff_factor, + &kf_err_per_mb_factor, + &kf_frame_min_boost_factor, + &kf_frame_max_boost_first_factor, + &kf_frame_max_boost_subs_factor, + &kf_max_total_boost_factor, + &gf_max_total_boost_factor, + &gf_frame_max_boost_factor, + &zm_factor, + &rd_mult_inter_qp_fac, + &rd_mult_arf_qp_fac, + &rd_mult_key_qp_fac, + NULL }; +#endif + static const arg_def_t bias_pct = ARG_DEF(NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)"); static const arg_def_t minsection_pct = @@ -573,6 +627,8 @@ #if CONFIG_VP9_ENCODER fprintf(fout, "\nVP9 Specific Options:\n"); arg_show_usage(fout, vp9_args); + fprintf(fout, "\nVizier Rate Control Options:\n"); + arg_show_usage(fout, vizier_rc_args); #endif fprintf(fout, "\nStream timebase (--timebase):\n" @@ -614,9 +670,6 @@ struct vpx_codec_enc_cfg cfg; const char *out_fn; const char *stats_fn; -#if CONFIG_FP_MB_STATS - const char *fpmb_stats_fn; -#endif stereo_format_t stereo_fmt; int arg_ctrls[ARG_CTRL_CNT_MAX][2]; int arg_ctrl_cnt; @@ -644,9 +697,6 @@ uint64_t cx_time; size_t nbytes; stats_io_t stats; -#if CONFIG_FP_MB_STATS - stats_io_t fpmb_stats; -#endif struct vpx_image *img; vpx_codec_ctx_t decoder; int mismatch_seen; @@ -883,10 +933,6 @@ config->out_fn = arg.val; } else if (arg_match(&arg, &fpf_name, argi)) { config->stats_fn = arg.val; -#if CONFIG_FP_MB_STATS - } else if (arg_match(&arg, &fpmbf_name, argi)) { - config->fpmb_stats_fn = arg.val; -#endif } else if (arg_match(&arg, &use_webm, argi)) { #if CONFIG_WEBM_IO config->write_webm = 1; @@ -983,6 +1029,40 @@ config->cfg.kf_max_dist = arg_parse_uint(&arg); } else if (arg_match(&arg, &kf_disabled, argi)) { config->cfg.kf_mode = VPX_KF_DISABLED; +#if CONFIG_VP9_ENCODER + } else if (arg_match(&arg, &use_vizier_rc_params, argi)) { + config->cfg.use_vizier_rc_params = arg_parse_int(&arg); + } else if (arg_match(&arg, &active_wq_factor, argi)) { + config->cfg.active_wq_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &err_per_mb_factor, argi)) { + config->cfg.err_per_mb_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &sr_default_decay_limit, argi)) { + config->cfg.sr_default_decay_limit = arg_parse_rational(&arg); + } else if (arg_match(&arg, &sr_diff_factor, argi)) { + config->cfg.sr_diff_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &kf_err_per_mb_factor, argi)) { + config->cfg.kf_err_per_mb_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &kf_frame_min_boost_factor, argi)) { + config->cfg.kf_frame_min_boost_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &kf_frame_max_boost_first_factor, argi)) { + config->cfg.kf_frame_max_boost_first_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &kf_frame_max_boost_subs_factor, argi)) { + config->cfg.kf_frame_max_boost_subs_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &kf_max_total_boost_factor, argi)) { + config->cfg.kf_max_total_boost_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &gf_max_total_boost_factor, argi)) { + config->cfg.gf_max_total_boost_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &gf_frame_max_boost_factor, argi)) { + config->cfg.gf_frame_max_boost_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &zm_factor, argi)) { + config->cfg.zm_factor = arg_parse_rational(&arg); + } else if (arg_match(&arg, &rd_mult_inter_qp_fac, argi)) { + config->cfg.rd_mult_inter_qp_fac = arg_parse_rational(&arg); + } else if (arg_match(&arg, &rd_mult_arf_qp_fac, argi)) { + config->cfg.rd_mult_arf_qp_fac = arg_parse_rational(&arg); + } else if (arg_match(&arg, &rd_mult_key_qp_fac, argi)) { + config->cfg.rd_mult_key_qp_fac = arg_parse_rational(&arg); +#endif #if CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &test16bitinternalarg, argi)) { if (strcmp(global->codec->name, "vp9") == 0) { @@ -1075,17 +1155,6 @@ fatal("Stream %d: duplicate stats file (from stream %d)", streami->index, stream->index); } - -#if CONFIG_FP_MB_STATS - /* Check for two streams sharing a mb stats file. */ - if (streami != stream) { - const char *a = stream->config.fpmb_stats_fn; - const char *b = streami->config.fpmb_stats_fn; - if (a && b && !strcmp(a, b)) - fatal("Stream %d: duplicate mb stats file (from stream %d)", - streami->index, stream->index); - } -#endif } } @@ -1177,6 +1246,10 @@ SHOW(kf_mode); SHOW(kf_min_dist); SHOW(kf_max_dist); + // Temporary use for debug + SHOW(use_vizier_rc_params); + SHOW(active_wq_factor.num); + SHOW(active_wq_factor.den); } static void open_output_file(struct stream_state *stream, @@ -1240,26 +1313,11 @@ fatal("Failed to open statistics store"); } -#if CONFIG_FP_MB_STATS - if (stream->config.fpmb_stats_fn) { - if (!stats_open_file(&stream->fpmb_stats, stream->config.fpmb_stats_fn, - pass)) - fatal("Failed to open mb statistics store"); - } else { - if (!stats_open_mem(&stream->fpmb_stats, pass)) - fatal("Failed to open mb statistics store"); - } -#endif - stream->config.cfg.g_pass = global->passes == 2 ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS : VPX_RC_ONE_PASS; if (pass) { stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats); -#if CONFIG_FP_MB_STATS - stream->config.cfg.rc_firstpass_mb_stats_in = - stats_get(&stream->fpmb_stats); -#endif } stream->cx_time = 0; @@ -1471,13 +1529,6 @@ pkt->data.twopass_stats.sz); stream->nbytes += pkt->data.raw.sz; break; -#if CONFIG_FP_MB_STATS - case VPX_CODEC_FPMB_STATS_PKT: - stats_write(&stream->fpmb_stats, pkt->data.firstpass_mb_stats.buf, - pkt->data.firstpass_mb_stats.sz); - stream->nbytes += pkt->data.raw.sz; - break; -#endif case VPX_CODEC_PSNR_PKT: if (global->show_psnr) { @@ -1971,10 +2022,6 @@ FOREACH_STREAM(stats_close(&stream->stats, global.passes - 1)); -#if CONFIG_FP_MB_STATS - FOREACH_STREAM(stats_close(&stream->fpmb_stats, global.passes - 1)); -#endif - if (global.pass) break; } diff -Nru libvpx-1.10.0/vpx_ports/x86.h libvpx-1.11.0/vpx_ports/x86.h --- libvpx-1.10.0/vpx_ports/x86.h 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/vpx_ports/x86.h 2021-10-06 17:41:19.000000000 +0000 @@ -223,6 +223,8 @@ } } + (void)reg_eax; // Avoid compiler warning on unused-but-set variable. + return flags & mask; } @@ -240,7 +242,7 @@ // x86_readtsc directly, but prevent the CPU's out-of-order execution from // affecting the measurement (by having earlier/later instructions be evaluated // in the time interval). See the white paper, "How to Benchmark Code -// Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures" by +// Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by // Gabriele Paoloni for more information. // // If you are timing a large function (CPU time > a couple of seconds), use @@ -306,14 +308,26 @@ static INLINE unsigned int x86_tsc_start(void) { unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + // This call should not be removed. See function notes above. cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + // Avoid compiler warnings on unused-but-set variables. + (void)reg_eax; + (void)reg_ebx; + (void)reg_ecx; + (void)reg_edx; return x86_readtsc(); } static INLINE unsigned int x86_tsc_end(void) { uint32_t v = x86_readtscp(); unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; + // This call should not be removed. See function notes above. cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); + // Avoid compiler warnings on unused-but-set variables. + (void)reg_eax; + (void)reg_ebx; + (void)reg_ecx; + (void)reg_edx; return v; } diff -Nru libvpx-1.10.0/y4minput.c libvpx-1.11.0/y4minput.c --- libvpx-1.10.0/y4minput.c 2021-03-18 19:59:46.000000000 +0000 +++ libvpx-1.11.0/y4minput.c 2021-10-06 17:41:19.000000000 +0000 @@ -285,26 +285,6 @@ } } -/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/ -static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst, - unsigned char *_aux) { - int c_w; - int c_h; - int c_sz; - int pli; - /*Skip past the luma data.*/ - _dst += _y4m->pic_w * _y4m->pic_h; - /*Compute the size of each chroma plane.*/ - c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; - c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; - c_sz = c_w * c_h; - for (pli = 1; pli < 3; pli++) { - y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h); - _dst += c_sz; - _aux += c_sz; - } -} - /*This format is only used for interlaced content, but is included for completeness. @@ -889,7 +869,8 @@ y4m_ctx->aux_buf = NULL; y4m_ctx->dst_buf = NULL; if (strcmp(y4m_ctx->chroma_type, "420") == 0 || - strcmp(y4m_ctx->chroma_type, "420jpeg") == 0) { + strcmp(y4m_ctx->chroma_type, "420jpeg") == 0 || + strcmp(y4m_ctx->chroma_type, "420mpeg2") == 0) { y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v = y4m_ctx->dst_c_dec_v = 2; y4m_ctx->dst_buf_read_sz = @@ -934,14 +915,6 @@ fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n"); return -1; } - } else if (strcmp(y4m_ctx->chroma_type, "420mpeg2") == 0) { - y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v = - y4m_ctx->dst_c_dec_v = 2; - y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h; - /*Chroma filter required: read into the aux buf first.*/ - y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = - 2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2); - y4m_ctx->convert = y4m_convert_42xmpeg2_42xjpeg; } else if (strcmp(y4m_ctx->chroma_type, "420paldv") == 0) { y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v = y4m_ctx->dst_c_dec_v = 2;