diff -Nru opus-1.1.1/celt/celt_decoder.c opus-1.1.2/celt/celt_decoder.c --- opus-1.1.1/celt/celt_decoder.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/celt_decoder.c 2016-01-12 22:27:45.000000000 +0000 @@ -457,10 +457,9 @@ VARDECL(celt_norm, X); #endif opus_uint32 seed; - opus_val16 *plcLogE; int end; int effEnd; - + opus_val16 decay; end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); @@ -472,19 +471,13 @@ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ #endif - if (loss_count >= 5) - plcLogE = backgroundLogE; - else { - /* Energy decay */ - opus_val16 decay = loss_count==0 ? - QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); - c=0; do - { - for (i=start;irng; for (c=0;c>1)); } while (++cdownsample, 0, st->arch); + celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -1037,10 +1030,18 @@ /* In case start or end were to change */ if (!isTransient) { + opus_val16 max_background_increase; OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); + /* In normal circumstances, we only allow the noise floor to increase by + up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB + increase for each update.*/ + if (st->loss_count < 10) + max_background_increase = M*QCONST16(0.001f,DB_SHIFT); + else + max_background_increase = QCONST16(1.f,DB_SHIFT); for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); + backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); } else { for (i=0;i<2*nbEBands;i++) oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); diff -Nru opus-1.1.1/celt/celt_encoder.c opus-1.1.2/celt/celt_encoder.c --- opus-1.1.1/celt/celt_encoder.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/celt_encoder.c 2016-01-12 22:27:45.000000000 +0000 @@ -343,9 +343,9 @@ { int id; #ifdef FIXED_POINT - id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */ + id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ #else - id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */ + id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ #endif unmask += inv_table[id]; } @@ -375,8 +375,8 @@ /* Looks for sudden increases of energy to decide whether we need to patch the transient decision */ -int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int end, int C) +static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, + int start, int end, int C) { int i, c; opus_val32 mean_diff=0; @@ -385,28 +385,28 @@ avoid false detection caused by irrelevant bands */ if (C==1) { - spread_old[0] = oldE[0]; - for (i=1;i=0;i--) + for (i=end-2;i>=start;i--) spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); /* Compute mean increase */ c=0; do { - for (i=2;i QCONST16(1.f, DB_SHIFT); } @@ -1735,7 +1735,7 @@ time-domain analysis */ if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, end, C)) + if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) { isTransient = 1; shortBlocks = M; diff -Nru opus-1.1.1/celt/cpu_support.h opus-1.1.2/celt/cpu_support.h --- opus-1.1.1/celt/cpu_support.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/cpu_support.h 2016-01-12 22:27:45.000000000 +0000 @@ -45,16 +45,18 @@ #elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) + (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ + (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) #include "x86/x86cpu.h" -/* We currently support 4 x86 variants: +/* We currently support 5 x86 variants: * arch[0] -> non-sse * arch[1] -> sse * arch[2] -> sse2 * arch[3] -> sse4.1 + * arch[4] -> avx */ -#define OPUS_ARCHMASK 3 +#define OPUS_ARCHMASK 7 int opus_select_arch(void); #else diff -Nru opus-1.1.1/celt/quant_bands.c opus-1.1.2/celt/quant_bands.c --- opus-1.1.1/celt/quant_bands.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/quant_bands.c 2016-01-12 22:27:45.000000000 +0000 @@ -292,7 +292,7 @@ #endif } if (lfe) - max_decay=3; + max_decay = QCONST16(3.f,DB_SHIFT); enc_start_state = *enc; ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); diff -Nru opus-1.1.1/celt/rate.c opus-1.1.2/celt/rate.c --- opus-1.1.1/celt/rate.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/rate.c 2016-01-12 22:27:45.000000000 +0000 @@ -131,7 +131,7 @@ for (i=0;iHW_SSE = (info[3] & (1 << 25)) != 0; cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; + cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; } else { cpu_feature->HW_SSE = 0; cpu_feature->HW_SSE2 = 0; cpu_feature->HW_SSE41 = 0; + cpu_feature->HW_AVX = 0; } } @@ -138,6 +143,12 @@ { return arch; } + arch++; + + if (!cpu_feature.HW_AVX) + { + return arch; + } arch++; return arch; diff -Nru opus-1.1.1/celt/x86/x86cpu.h opus-1.1.2/celt/x86/x86cpu.h --- opus-1.1.1/celt/x86/x86cpu.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/celt/x86/x86cpu.h 2016-01-12 22:27:45.000000000 +0000 @@ -46,6 +46,12 @@ # define MAY_HAVE_SSE4_1(name) name ## _c # endif +# if defined(OPUS_X86_MAY_HAVE_AVX) +# define MAY_HAVE_AVX(name) name ## _avx +# else +# define MAY_HAVE_AVX(name) name ## _c +# endif + # if defined(OPUS_HAVE_RTCD) int opus_select_arch(void); # endif diff -Nru opus-1.1.1/config.h.in opus-1.1.2/config.h.in --- opus-1.1.1/config.h.in 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/config.h.in 2016-01-12 22:27:45.000000000 +0000 @@ -121,6 +121,9 @@ /* Use run-time CPU capabilities detection */ #undef OPUS_HAVE_RTCD +/* Compiler supports X86 AVX Intrinsics */ +#undef OPUS_X86_MAY_HAVE_AVX + /* Compiler supports X86 SSE Intrinsics */ #undef OPUS_X86_MAY_HAVE_SSE @@ -130,6 +133,9 @@ /* Compiler supports X86 SSE4.1 Intrinsics */ #undef OPUS_X86_MAY_HAVE_SSE4_1 +/* Define if binary requires AVX intrinsics support */ +#undef OPUS_X86_PRESUME_AVX + /* Define if binary requires SSE intrinsics support */ #undef OPUS_X86_PRESUME_SSE diff -Nru opus-1.1.1/configure opus-1.1.2/configure --- opus-1.1.1/configure 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/configure 2016-01-12 22:27:45.000000000 +0000 @@ -649,6 +649,7 @@ OPUS_ARM_NEON_INTR_TRUE CPU_ARM_FALSE CPU_ARM_TRUE +OPUS_X86_AVX_CFLAGS OPUS_X86_SSE4_1_CFLAGS OPUS_X86_SSE2_CFLAGS OPUS_X86_SSE_CFLAGS @@ -657,9 +658,12 @@ HAVE_ARM_NE10 OPUS_ARM_NEON_INTR_CFLAGS ARM_NEON_INTR_CFLAGS +X86_AVX_CFLAGS X86_SSE4_1_CFLAGS X86_SSE2_CFLAGS X86_SSE_CFLAGS +HAVE_AVX_FALSE +HAVE_AVX_TRUE HAVE_SSE4_1_FALSE HAVE_SSE4_1_TRUE HAVE_SSE2_FALSE @@ -848,6 +852,7 @@ X86_SSE_CFLAGS X86_SSE2_CFLAGS X86_SSE4_1_CFLAGS +X86_AVX_CFLAGS ARM_NEON_INTR_CFLAGS' @@ -1532,6 +1537,8 @@ C compiler flags to compile SSE2 intrinsics [default=-msse2] X86_SSE4_1_CFLAGS C compiler flags to compile SSE4.1 intrinsics [default=-msse4.1] + X86_AVX_CFLAGS + C compiler flags to compile AVX intrinsics [default=-mavx] ARM_NEON_INTR_CFLAGS C compiler flags to compile ARM NEON intrinsics [default=-mfpu=neon / -mfpu=neon -mfloat-abi=softfp] @@ -2365,7 +2372,7 @@ # For libtool. OPUS_LT_CURRENT=5 -OPUS_LT_REVISION=1 +OPUS_LT_REVISION=2 OPUS_LT_AGE=5 @@ -12949,6 +12956,15 @@ HAVE_SSE4_1_FALSE= fi + if false; then + HAVE_AVX_TRUE= + HAVE_AVX_FALSE='#' +else + HAVE_AVX_TRUE='#' + HAVE_AVX_FALSE= +fi + + @@ -12975,6 +12991,7 @@ + if ${X86_SSE_CFLAGS+:} false; then : else @@ -12990,6 +13007,11 @@ else X86_SSE4_1_CFLAGS="-msse4.1" fi +if ${X86_AVX_CFLAGS+:} false; then : + +else + X86_AVX_CFLAGS="-mavx" +fi if ${ARM_NEON_INTR_CFLAGS+:} false; then : else @@ -13473,6 +13495,83 @@ fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports AVX intrinsics" >&5 +$as_echo_n "checking if compiler supports AVX intrinsics... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ + + static __m256 mtest; + mtest = _mm256_setzero_ps(); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + OPUS_X86_MAY_HAVE_AVX=1 + OPUS_X86_PRESUME_AVX=1 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +else + + OPUS_X86_PRESUME_AVX=0 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler supports AVX intrinsics with $X86_AVX_CFLAGS" >&5 +$as_echo_n "checking if compiler supports AVX intrinsics with $X86_AVX_CFLAGS... " >&6; } + save_CFLAGS="$CFLAGS"; CFLAGS="$X86_AVX_CFLAGS $CFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +int +main () +{ + + static __m256 mtest; + mtest = _mm256_setzero_ps(); + + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + OPUS_X86_MAY_HAVE_AVX=1 + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + OPUS_X86_MAY_HAVE_AVX=0 + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS="$save_CFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + if test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"; then : + + OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS" + + + +fi if test x"$rtcd_support" = x"no"; then : rtcd_support="" fi @@ -13541,6 +13640,28 @@ $as_echo "$as_me: WARNING: Compiler does not support SSE4.1 intrinsics" >&2;} fi + if test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"; then : + + +$as_echo "#define OPUS_X86_MAY_HAVE_AVX 1" >>confdefs.h + + intrinsics_support="$intrinsics_support AVX" + + if test x"$OPUS_X86_PRESUME_AVX" = x"1"; then : + +$as_echo "#define OPUS_X86_PRESUME_AVX 1" >>confdefs.h + +else + rtcd_support="$rtcd_support AVX" +fi + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Compiler does not support AVX intrinsics" >&5 +$as_echo "$as_me: WARNING: Compiler does not support AVX intrinsics" >&2;} + +fi + if test x"$intrinsics_support" = x""; then : intrinsics_support=no else @@ -13691,6 +13812,14 @@ HAVE_SSE4_1_FALSE= fi + if test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"; then + HAVE_AVX_TRUE= + HAVE_AVX_FALSE='#' +else + HAVE_AVX_TRUE='#' + HAVE_AVX_FALSE= +fi + if test x"$enable_rtcd" = x"yes"; then : @@ -14091,6 +14220,10 @@ as_fn_error $? "conditional \"HAVE_SSE4_1\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_AVX_TRUE}" && test -z "${HAVE_AVX_FALSE}"; then + as_fn_error $? "conditional \"HAVE_AVX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${CPU_ARM_TRUE}" && test -z "${CPU_ARM_FALSE}"; then as_fn_error $? "conditional \"CPU_ARM\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -14115,6 +14248,10 @@ as_fn_error $? "conditional \"HAVE_SSE4_1\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_AVX_TRUE}" && test -z "${HAVE_AVX_FALSE}"; then + as_fn_error $? "conditional \"HAVE_AVX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${HAVE_DOXYGEN_TRUE}" && test -z "${HAVE_DOXYGEN_FALSE}"; then as_fn_error $? "conditional \"HAVE_DOXYGEN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff -Nru opus-1.1.1/configure.ac opus-1.1.2/configure.ac --- opus-1.1.1/configure.ac 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/configure.ac 2016-01-12 22:27:45.000000000 +0000 @@ -23,7 +23,7 @@ # For libtool. dnl Please update these for releases. OPUS_LT_CURRENT=5 -OPUS_LT_REVISION=1 +OPUS_LT_REVISION=2 OPUS_LT_AGE=5 AC_SUBST(OPUS_LT_CURRENT) @@ -351,10 +351,12 @@ AM_CONDITIONAL([HAVE_SSE], [false]) AM_CONDITIONAL([HAVE_SSE2], [false]) AM_CONDITIONAL([HAVE_SSE4_1], [false]) +AM_CONDITIONAL([HAVE_AVX], [false]) m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse]) m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2]) m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1]) +m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx]) m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon]) # With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify # -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu), @@ -371,11 +373,13 @@ AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@]) AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@]) +AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@]) AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@]) AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")]) AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")]) AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")]) +AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")]) AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])]) AC_DEFUN([OPUS_PATH_NE10], @@ -566,7 +570,24 @@ AC_SUBST([OPUS_X86_SSE4_1_CFLAGS]) ] ) - + OPUS_CHECK_INTRINSICS( + [AVX], + [$X86_AVX_CFLAGS], + [OPUS_X86_MAY_HAVE_AVX], + [OPUS_X86_PRESUME_AVX], + [[#include + ]], + [[ + static __m256 mtest; + mtest = _mm256_setzero_ps(); + ]] + ) + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"], + [ + OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS" + AC_SUBST([OPUS_X86_AVX_CFLAGS]) + ] + ) AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""]) AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"], [ @@ -606,6 +627,19 @@ [ AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics]) ]) + AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"], + [ + AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics]) + intrinsics_support="$intrinsics_support AVX" + + AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"], + [AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])], + [rtcd_support="$rtcd_support AVX"]) + ], + [ + AC_MSG_WARN([Compiler does not support AVX intrinsics]) + ]) + AS_IF([test x"$intrinsics_support" = x""], [intrinsics_support=no], [intrinsics_support="x86$intrinsics_support"] @@ -672,6 +706,8 @@ [test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"]) AM_CONDITIONAL([HAVE_SSE4_1], [test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"]) +AM_CONDITIONAL([HAVE_AVX], + [test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"]) AS_IF([test x"$enable_rtcd" = x"yes"],[ AS_IF([test x"$rtcd_support" != x"no"],[ diff -Nru opus-1.1.1/debian/changelog opus-1.1.2/debian/changelog --- opus-1.1.1/debian/changelog 2016-01-07 18:20:07.000000000 +0000 +++ opus-1.1.2/debian/changelog 2016-02-17 15:40:01.000000000 +0000 @@ -1,14 +1,39 @@ +opus (1.1.2-1ubuntu1) xenial; urgency=medium + + * Merge with Debian; remaining changes: + - Convert to Debhelper 9 with a simpler rules file. + - Build using dh-autoreconf. + - Add a watch file. + - debian/control: close some Lintian errors. + - Pre-depend on multiarch-support. + - Add ${misc:Depends} to package dependencies. + - Convert to debian source format 3.0 (quilt). + - Run the tests. + + -- Matthias Klose Wed, 17 Feb 2016 16:33:28 +0100 + +opus (1.1.2-1) unstable; urgency=medium + + * Fixes the transient detector on silence. + * Fixes discontinuities in background noise after extended PLC. + * Make the CELT background noise estimator adapt more quickly on DTX update. + * Fixes max_decay for LFE in fixed-point. + * Fixes patch_transient_decision() for hybrid mode. + * Don't reset the RTCD arch on encoder/decoder reset. + + -- Ron Lee Wed, 13 Jan 2016 07:26:14 +1030 + opus (1.1.1-1ubuntu1) xenial; urgency=medium * Merge with Debian; remaining changes: - * Convert to Debhelper 9 with a simpler rules file. - * Build using dh-autoreconf. - * Add a watch file. - * debian/control: close some Lintian errors. - - Pre-depend on multiarch-support. - - Add ${misc:Depends} to package dependencies. - * Convert to debian source format 3.0 (quilt). - * Run the tests. + - Convert to Debhelper 9 with a simpler rules file. + - Build using dh-autoreconf. + - Add a watch file. + - debian/control: close some Lintian errors. + - Pre-depend on multiarch-support. + - Add ${misc:Depends} to package dependencies. + - Convert to debian source format 3.0 (quilt). + - Run the tests. -- Matthias Klose Thu, 07 Jan 2016 18:43:43 +0100 diff -Nru opus-1.1.1/doc/draft-ietf-codec-oggopus.xml opus-1.1.2/doc/draft-ietf-codec-oggopus.xml --- opus-1.1.1/doc/draft-ietf-codec-oggopus.xml 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/doc/draft-ietf-codec-oggopus.xml 2016-01-12 22:27:45.000000000 +0000 @@ -4,14 +4,17 @@ + + ]> - + Ogg Encapsulation for the Opus Audio Codec @@ -60,7 +63,7 @@ - + RAI codec @@ -70,14 +73,6 @@ audio codec. This allows data encoded in the Opus format to be stored in an Ogg logical bitstream. -Ogg encapsulation provides Opus with a long-term storage format supporting - all of the essential features, including metadata, fast and accurate seeking, - corruption detection, recapture after errors, low overhead, and the ability to - multiplex Opus with other codecs (including video) with minimal buffering. -It also provides a live streamable format, capable of delivery over a reliable - stream-oriented transport, without requiring all the data, or even the total - length of the data, up-front, in a form that is identical to the on-disk - storage format. @@ -90,6 +85,14 @@ See for technical details. This document defines the encapsulation of Opus in a continuous, logical Ogg bitstream . +Ogg encapsulation provides Opus with a long-term storage format supporting + all of the essential features, including metadata, fast and accurate seeking, + corruption detection, recapture after errors, low overhead, and the ability to + multiplex Opus with other codecs (including video) with minimal buffering. +It also provides a live streamable format, capable of delivery over a reliable + stream-oriented transport, without requiring all the data, or even the total + length of the data, up-front, in a form that is identical to the on-disk + storage format. Ogg bitstreams are made up of a series of 'pages', each of which contains data @@ -104,8 +107,8 @@ Packets can be split arbitrarily across pages, and continued from one page to the next (allowing packets much larger than would fit on a single page). Each page contains 'lacing values' that indicate how the data is partitioned - into packets, allowing a demuxer to recover the packet boundaries without - examining the encoded data. + into packets, allowing a demultiplexer (demuxer) to recover the packet + boundaries without examining the encoded data. A packet is said to 'complete' on a page when the page contains the final lacing value corresponding to that packet. @@ -127,14 +130,6 @@ document are to be interpreted as described in . - -Implementations that fail to satisfy one or more "MUST" requirements are - considered non-compliant. -Implementations that satisfy all "MUST" requirements, but fail to satisfy one - or more "SHOULD" requirements are said to be "conditionally compliant". -All other implementations are "unconditionally compliant". - -
@@ -143,21 +138,18 @@ There are two mandatory header packets. -The granule position of the pages on which these packets complete MUST be zero. - - The first packet in the logical Ogg bitstream MUST contain the identification (ID) header, which uniquely identifies a stream as Opus audio. The format of this header is defined in . -It MUST be placed alone (without any other packet data) on the first page of - the logical Ogg bitstream, and MUST complete on that page. -This page MUST have its 'beginning of stream' flag set. +It is placed alone (without any other packet data) on the first page of + the logical Ogg bitstream, and completes on that page. +This page has its 'beginning of stream' flag set. The second packet in the logical Ogg bitstream MUST contain the comment header, which contains user-supplied metadata. The format of this header is defined in . -It MAY span one or more pages, beginning on the second page of the logical +It MAY span multiple pages, beginning on the second page of the logical stream. However many pages it spans, the comment header packet MUST finish the page on which it completes. @@ -173,40 +165,59 @@ logical Ogg bitstream. -The first N-1 Opus packets, if any, are packed one after another into the Ogg - packet, using the self-delimiting framing from Appendix B of +The first (N - 1) Opus packets, if any, are packed one after another + into the Ogg packet, using the self-delimiting framing from Appendix B of . The remaining Opus packet is packed at the end of the Ogg packet using the regular, undelimited framing from Section 3 of . All of the Opus packets in a single Ogg packet MUST be constrained to have the same duration. -A decoder SHOULD treat any Opus packet whose duration is different from that of - the first Opus packet in an Ogg packet as if it were a malformed Opus packet - with an invalid TOC sequence. - - -The coding mode (SILK, Hybrid, or CELT), audio bandwidth, channel count, - duration (frame size), and number of frames per packet, are indicated in the - TOC (table of contents) sequence at the beginning of each Opus packet, as - described in Section 3.1 of . -The combination of mode, audio bandwidth, and frame size is referred to as - the configuration of an Opus packet. - - -The first audio data page SHOULD NOT have the 'continued packet' flag set - (which would indicate the first audio data packet is continued from a previous - page). -Packets MUST be placed into Ogg pages in order until the end of stream. -Audio packets MAY span page boundaries. -A decoder MUST treat a zero-octet audio data packet as if it were a malformed - Opus packet as described in Section 3.4 of . - - -The last page SHOULD have the 'end of stream' flag set, but implementations - need to be prepared to deal with truncated streams that do not have a page - marked 'end of stream'. -The final packet on the last page SHOULD NOT be a continued packet, i.e., the - final lacing value SHOULD be less than 255. +An implementation of this specification SHOULD treat any Opus packet whose + duration is different from that of the first Opus packet in an Ogg packet as + if it were a malformed Opus packet with an invalid Table Of Contents (TOC) + sequence. + + +The TOC sequence at the beginning of each Opus packet indicates the coding + mode, audio bandwidth, channel count, duration (frame size), and number of + frames per packet, as described in Section 3.1 + of . +The coding mode is one of SILK, Hybrid, or Constrained Energy Lapped Transform + (CELT). +The combination of coding mode, audio bandwidth, and frame size is referred to + as the configuration of an Opus packet. + + +Packets are placed into Ogg pages in order until the end of stream. +Audio data packets might span page boundaries. +The first audio data page could have the 'continued packet' flag set + (indicating the first audio data packet is continued from a previous page) if, + for example, it was a live stream joined mid-broadcast, with the headers + pasted on the front. +A demuxer SHOULD NOT attempt to decode the data for the first packet on a page + with the 'continued packet' flag set if the previous page with packet data + does not end in a continued packet (i.e., did not end with a lacing value of + 255) or if the page sequence numbers are not consecutive, unless the demuxer + has some special knowledge that would allow it to interpret this data + despite the missing pieces. +An implementation MUST treat a zero-octet audio data packet as if it were a + malformed Opus packet as described in + Section 3.4 of . + + +A logical stream ends with a page with the 'end of stream' flag set, but + implementations need to be prepared to deal with truncated streams that do not + have a page marked 'end of stream'. +There is no reason for the final packet on the last page to be a continued + packet, i.e., for the final lacing value to be less than 255. +However, demuxers might encounter such streams, possibly as the result of a + transfer that did not complete or of corruption. +A demuxer SHOULD NOT attempt to decode the data from a packet that continues + onto a subsequent page (i.e., when the page ends with a lacing value of 255) + if the next page with packet data does not have the 'continued packet' flag + set or does not exist, or if the page sequence numbers are not consecutive, + unless the demuxer has some special knowledge that would allow it to interpret + this data despite the missing pieces. There MUST NOT be any more pages in an Opus logical bitstream after a page marked 'end of stream'. @@ -214,12 +225,23 @@
+The granule position MUST be zero for the ID header page and the + page where the comment header completes. +That is, the first page in the logical stream, and the last header + page before the first audio data page both have a granule position of zero. + + The granule position of an audio data page encodes the total number of PCM samples in the stream up to and including the last fully-decodable sample from the last packet completed on that page. +The granule position of the first audio data page will usually be larger than + zero, as described in . + + + A page that is entirely spanned by a single packet (that completes on a - subsequent page) has no granule position, and the granule position field MUST - be set to the special value '-1' in two's complement. + subsequent page) has no granule position, and the granule position field is + set to the special value '-1' in two's complement. @@ -259,8 +281,14 @@
In order to support capturing a real-time stream that has lost or not - transmitted packets, a muxer SHOULD emit packets that explicitly request the - use of Packet Loss Concealment (PLC) in place of the missing packets. + transmitted packets, a multiplexer (muxer) SHOULD emit packets that explicitly + request the use of Packet Loss Concealment (PLC) in place of the missing + packets. +Implementations that fail to do so still MUST NOT increment the granule + position for a page by anything other than the number of samples contained in + packets that actually complete on that page. + + Only gaps that are a multiple of 2.5 ms are repairable, as these are the only durations that can be created by packet loss or discontinuous transmission. @@ -359,16 +387,17 @@ These samples need to be stored and decoded, as Opus is an asymptotically convergent predictive codec, meaning the decoded contents of each frame depend on the recent history of decoder inputs. -However, a decoder will want to skip these samples after decoding them. +However, a player will want to skip these samples after decoding them. A 'pre-skip' field in the ID header (see ) signals - the number of samples which SHOULD be skipped (decoded but discarded) at the - beginning of the stream. + the number of samples that SHOULD be skipped (decoded but discarded) at the + beginning of the stream, though some specific applications might have a reason + for looking at that data. This amount need not be a multiple of 2.5 ms, MAY be smaller than a single packet, or MAY span the contents of several packets. -These samples are not valid audio, and SHOULD NOT be played. +These samples are not valid audio. @@ -394,32 +423,30 @@
-
- The PCM sample position is determined from the granule position using the formula - + +
- For example, if the granule position of the first audio data page is 59,971, and the pre-skip is 11,971, then the PCM sample position of the last decoded sample from that page is 48,000. -
- + + This can be converted into a playback time using the formula - + +
- The initial PCM sample position before any samples are played is normally '0'. @@ -438,7 +465,7 @@ exactly two packets, in order to allow the decoder to perform PCM position adjustments before needing to return any PCM data. Opus uses the pre-skip mechanism for this purpose instead, since the encoder - MAY introduce more than a single packet's worth of latency, and since very + might introduce more than a single packet's worth of latency, and since very large packets in streams with a very large number of channels might not fit on a single page. @@ -485,10 +512,10 @@ decoded samples prevents a demuxer from working backwards to assign each packet or each individual sample a valid granule position, since granule positions are non-negative. -A decoder MUST reject as invalid any stream where the granule position is - smaller than the number of samples contained in packets that complete on the - first audio data page with a completed packet, unless that page has the 'end - of stream' flag set. +An implementation MUST reject as invalid any stream where the granule position + is smaller than the number of samples contained in packets that complete on + the first audio data page with a completed packet, unless that page has the + 'end of stream' flag set. It MAY defer this action until it decodes the last packet completed on that page. @@ -515,21 +542,21 @@ Seeking in Ogg files is best performed using a bisection search for a page whose granule position corresponds to a PCM position at or before the seek target. -With appropriately weighted bisection, accurate seeking can be performed with - just three or four bisections even in multi-gigabyte files. -See for general implementation guidance. +With appropriately weighted bisection, accurate seeking can be performed in + just one or two bisections on average, even in multi-gigabyte files. +See for an example of general implementation guidance. -When seeking within an Ogg Opus stream, the decoder SHOULD start decoding (and - discarding the output) at least 3840 samples (80 ms) prior to the - seek target in order to ensure that the output audio is correct by the time it - reaches the seek target. +When seeking within an Ogg Opus stream, an implementation SHOULD start decoding + (and discarding the output) at least 3840 samples (80 ms) prior to + the seek target in order to ensure that the output audio is correct by the + time it reaches the seek target. This 'pre-roll' is separate from, and unrelated to, the 'pre-skip' used at the beginning of the stream. If the point 80 ms prior to the seek target comes before the initial PCM - sample position, the decoder SHOULD start decoding from the beginning of the - stream, applying pre-skip as normal, regardless of whether the pre-skip is + sample position, an implementation SHOULD start decoding from the beginning of + the stream, applying pre-skip as normal, regardless of whether the pre-skip is larger or smaller than 80 ms, and then continue to discard samples to reach the seek target (if any). @@ -539,7 +566,7 @@
-An Opus stream contains exactly two mandatory header packets: +An Ogg Opus logical stream contains exactly two mandatory header packets: an identification header and a comment header. @@ -570,7 +597,7 @@ The fields in the identification (ID) header have the following meaning: -Magic Signature: +Magic Signature: This is an 8-octet (64-bit) field that allows codec identification and is human-readable. @@ -589,7 +616,7 @@ invalid TOC sequence. -Version (8 bits, unsigned): +Version (8 bits, unsigned): The version number MUST always be '1' for this version of the encapsulation specification. @@ -599,14 +626,14 @@ That is, the version number can be split into "major" and "minor" version sub-fields, with changes to the "minor" sub-field (in the lower four bits) signaling compatible changes. -For example, a decoder implementing this specification SHOULD accept any stream +For example, an implementation of this specification SHOULD accept any stream with a version number of '15' or less, and SHOULD assume any stream with a version number '16' or greater is incompatible. The initial version '1' was chosen to keep implementations from relying on this octet as a null terminator for the "OpusHead" string. -Output Channel Count 'C' (8 bits, unsigned): +Output Channel Count 'C' (8 bits, unsigned): This is the number of output channels. This might be different than the number of encoded channels, which can change @@ -617,7 +644,7 @@ See for details. -Pre-skip (16 bits, unsigned, little +Pre-skip (16 bits, unsigned, little endian): This is the number of samples (at 48 kHz) to discard from the decoder @@ -628,9 +655,10 @@ convergence in the decoder. -Input Sample Rate (32 bits, unsigned, little +Input Sample Rate (32 bits, unsigned, little endian): +This is the sample rate of the original input (before encoding), in Hz. This field is not the sample rate to use for playback of the encoded data. @@ -639,8 +667,8 @@ Each packet in the stream can have a different audio bandwidth. Regardless of the audio bandwidth, the reference decoder supports decoding any stream at a sample rate of 8, 12, 16, 24, or 48 kHz. -The original sample rate of the encoder input is not preserved by the lossy - compression. +The original sample rate of the audio passed to the encoder is not preserved + by the lossy compression. An Ogg Opus player SHOULD select the playback sample rate according to the following procedure: @@ -649,56 +677,52 @@ Otherwise, if the hardware's highest available sample rate is a supported rate, decode at this sample rate. Otherwise, if the hardware's highest available sample rate is less than - 48 kHz, decode at the next highest supported rate above this and - resample. + 48 kHz, decode at the next higher Opus supported rate above the highest + available hardware rate and resample. Otherwise, decode at 48 kHz and resample. -However, the 'Input Sample Rate' field allows the encoder to pass the sample +However, the 'Input Sample Rate' field allows the muxer to pass the sample rate of the original input stream as metadata. This is useful when the user requires the output sample rate to match the input sample rate. -For example, a non-player decoder writing PCM format samples to disk might - choose to resample the output audio back to the original input sample rate to - reduce surprise to the user, who might reasonably expect to get back a file - with the same sample rate as the one they fed to the encoder. +For example, when not playing the output, an implementation writing PCM format + samples to disk might choose to resample the audio back to the original input + sample rate to reduce surprise to the user, who might reasonably expect to get + back a file with the same sample rate. A value of zero indicates 'unspecified'. -Encoders SHOULD write the actual input sample rate or zero, but decoder - implementations which do something with this field SHOULD take care to behave - sanely if given crazy values (e.g., do not actually upsample the output to - 10 MHz if requested). -Input sample rates between 8 kHz and 192 kHz (inclusive) SHOULD be - supported. +Muxers SHOULD write the actual input sample rate or zero, but implementations + which do something with this field SHOULD take care to behave sanely if given + crazy values (e.g., do not actually upsample the output to 10 MHz if + requested). +Implementations SHOULD support input sample rates between 8 kHz and + 192 kHz (inclusive). Rates outside this range MAY be ignored by falling back to the default rate of 48 kHz instead. -Output Gain (16 bits, signed, little - endian): +Output Gain (16 bits, signed, little endian): -This is a gain to be applied by the decoder. -It is 20*log10 of the factor to scale the decoder output by to achieve the - desired playback volume, stored in a 16-bit, signed, two's complement +This is a gain to be applied when decoding. +It is 20*log10 of the factor by which to scale the decoder output to achieve + the desired playback volume, stored in a 16-bit, signed, two's complement fixed-point value with 8 fractional bits (i.e., Q7.8). + +To apply the gain, an implementation could use
- -To apply the gain, a decoder could use - - - where output_gain is the raw 16-bit value from the header. -
+ where output_gain is the raw 16-bit value from the header. -Virtually all players and media frameworks SHOULD apply it by default. +Players and media frameworks SHOULD apply it by default. If a player chooses to apply any volume adjustment or gain modification, such as the R128_TRACK_GAIN (see ), the adjustment MUST be applied in addition to this output gain in order to achieve playback at the normalized volume. -An encoder SHOULD set this field to zero, and instead apply any gain prior to +A muxer SHOULD set this field to zero, and instead apply any gain prior to encoding, when this is possible and does not conflict with the user's wishes. A nonzero output gain indicates the gain was adjusted after encoding, or that a user wished to adjust the gain for playback while preserving the ability @@ -712,19 +736,17 @@ saturating.
-Channel Mapping Family (8 bits, - unsigned): +Channel Mapping Family (8 bits, unsigned): This octet indicates the order and semantic meaning of the output channels. -Each possible value of this octet indicates a mapping family, which defines a - set of allowed channel counts, and the ordered set of channel names for each - allowed channel count. +Each currently specified value of this octet indicates a mapping family, which + defines a set of allowed channel counts, and the ordered set of channel names + for each allowed channel count. The details are described in . -Channel Mapping Table: +Channel Mapping Table: This table defines the mapping from encoded streams to output channels. -It is omitted when the channel mapping family is 0, but REQUIRED otherwise. Its contents are specified in . @@ -732,24 +754,23 @@ All fields in the ID headers are REQUIRED, except for the channel mapping - table, which is omitted when the channel mapping family is 0. -Implementations SHOULD reject ID headers which do not contain enough data for - these fields, even if they contain a valid Magic Signature. + table, which MUST be omitted when the channel mapping family is 0, but + is REQUIRED otherwise. +Implementations SHOULD reject streams with ID headers that do not contain + enough data for these fields, even if they contain a valid Magic Signature. Future versions of this specification, even backwards-compatible versions, might include additional fields in the ID header. If an ID header has a compatible major version, but a larger minor version, an implementation MUST NOT reject it for containing additional data not - specified here. -However, implementations MAY reject streams in which the ID header does not - complete on the first page. + specified here, provided it still completes on the first page.
An Ogg Opus stream allows mapping one number of Opus streams (N) to a possibly - larger number of decoded channels (M+N) to yet another number of output - channels (C), which might be larger or smaller than the number of decoded - channels. + larger number of decoded channels (M + N) to yet another number of + output channels (C), which might be larger or smaller than the number of + decoded channels. The order and meaning of these channels are defined by a channel mapping, which consists of the 'channel mapping family' octet and, for channel mapping families other than family 0, a channel mapping table, as illustrated in @@ -772,7 +793,7 @@ The fields in the channel mapping table have the following meaning: -Stream Count 'N' (8 bits, unsigned): +Stream Count 'N' (8 bits, unsigned): This is the total number of streams encoded in each Ogg packet. This value is necessary to correctly parse the packed Opus packets inside an @@ -783,51 +804,54 @@ For channel mapping family 0, this value defaults to 1, and is not coded. -Coupled Stream Count 'M' (8 bits, unsigned): +Coupled Stream Count 'M' (8 bits, unsigned): This is the number of streams whose decoders are to be configured to produce - two channels. + two channels (stereo). This MUST be no larger than the total number of streams, N. Each packet in an Opus stream has an internal channel count of 1 or 2, which can change from packet to packet. This is selected by the encoder depending on the bitrate and the audio being encoded. -The original channel count of the encoder input is not preserved by the lossy - compression. +The original channel count of the audio passed to the encoder is not + necessarily preserved by the lossy compression. Regardless of the internal channel count, any Opus stream can be decoded as mono (a single channel) or stereo (two channels) by appropriate initialization of the decoder. -The 'coupled stream count' field indicates that the first M Opus decoders are - to be initialized for stereo output, and the remaining N-M decoders are to be - initialized for mono only. -The total number of decoded channels, (M+N), MUST be no larger than 255, as - there is no way to index more channels than that in the channel mapping. +The 'coupled stream count' field indicates that the decoders for the first M + Opus streams are to be initialized for stereo (two-channel) output, and the + remaining (N - M) decoders are to be initialized for mono (a single + channel) only. +The total number of decoded channels, (M + N), MUST be no larger than + 255, as there is no way to index more channels than that in the channel + mapping. -For channel mapping family 0, this value defaults to C-1 (i.e., 0 for mono - and 1 for stereo), and is not coded. +For channel mapping family 0, this value defaults to (C - 1) + (i.e., 0 for mono and 1 for stereo), and is not coded. -Channel Mapping (8*C bits): +Channel Mapping (8*C bits): This contains one octet per output channel, indicating which decoded channel is to be used for each one. Let 'index' be the value of this octet for a particular output channel. -This value MUST either be smaller than (M+N), or be the special value 255. +This value MUST either be smaller than (M + N), or be the special + value 255. If 'index' is less than 2*M, the output MUST be taken from decoding stream ('index'/2) as stereo and selecting the left channel if 'index' is even, and the right channel if 'index' is odd. If 'index' is 2*M or larger, but less than 255, the output MUST be taken from - decoding stream ('index'-M) as mono. + decoding stream ('index' - M) as mono. If 'index' is 255, the corresponding output channel MUST contain pure silence. The number of output channels, C, is not constrained to match the number of - decoded channels (M+N). + decoded channels (M + N). A single index value MAY appear multiple times, i.e., the same decoded channel might be mapped to multiple output channels. Some decoded channels might not be assigned to any output channel, as well. -For channel mapping family 0, the first index defaults to 0, and if C==2, - the second index defaults to 1. +For channel mapping family 0, the first index defaults to 0, and if + C == 2, the second index defaults to 1. Neither index is coded. @@ -843,17 +867,18 @@ Allowed numbers of channels: 1 or 2. RTP mapping. +This is the same channel interpretation as . 1 channel: monophonic (mono). 2 channels: stereo (left, right). -Special mapping: This channel mapping value also +Special mapping: This channel mapping value also indicates that the contents consists of a single Opus stream that is stereo if - and only if C==2, with stream index 0 mapped to output channel 0 (mono, or - left channel) and stream index 1 mapped to output channel 1 (right channel) - if stereo. + and only if C == 2, with stream index 0 mapped to output + channel 0 (mono, or left channel) and stream index 1 mapped to + output channel 1 (right channel) if stereo. When the 'channel mapping family' octet has this value, the channel mapping table MUST be omitted from the ID header packet. @@ -862,13 +887,13 @@
Allowed numbers of channels: 1...8. -Vorbis channel order. +Vorbis channel order (see below). Each channel is assigned to a speaker location in a conventional surround arrangement. Specific locations depend on the number of channels, and are given below - in order of the corresponding channel indicies. + in order of the corresponding channel indices. 1 channel: monophonic (mono). 2 channels: stereo (left, right). @@ -885,11 +910,11 @@ as those used by the Vorbis codec . The ordering is different from the one used by the WAVE and - FLAC formats, + Free Lossless Audio Codec (FLAC) formats, so correct ordering requires permutation of the output channels when decoding to or encoding from those formats. -'LFE' here refers to a Low Frequency Effects, often mapped to a subwoofer - with no particular spatial position. +'LFE' here refers to a Low Frequency Effects channel, often mapped to a + subwoofer with no particular spatial position. Implementations SHOULD identify 'side' or 'rear' speaker locations with 'surround' and 'back' as appropriate when interfacing with audio formats or systems which prefer that terminology. @@ -904,10 +929,11 @@ Channels are unidentified. -General-purpose players SHOULD NOT attempt to play these streams, and offline - decoders MAY deinterleave the output into separate PCM files, one per channel. -Decoders SHOULD NOT produce output for channels mapped to stream index 255 - (pure silence) unless they have no other way to indicate the index of +General-purpose players SHOULD NOT attempt to play these streams. +Offline implementations MAY deinterleave the output into separate PCM files, + one per channel. +Implementations SHOULD NOT produce output for channels mapped to stream index + 255 (pure silence) unless they have no other way to indicate the index of non-silent channels.
@@ -916,8 +942,8 @@ title="Undefined Channel Mappings"> The remaining channel mapping families (2...254) are reserved. -A decoder encountering a reserved channel mapping family value SHOULD act as - though the value is 255. +A demuxer implementation encountering a reserved channel mapping family value + SHOULD act as though the value is 255.
@@ -931,16 +957,16 @@
-Implementations MAY use the following matricies to implement downmixing from +Implementations MAY use the following matrices to implement downmixing from multichannel files using Channel Mapping Family 1, which are known to give acceptable results for stereo. -Matricies for 3 and 4 channels are normalized so each coefficent row sums +Matrices for 3 and 4 channels are normalized so each coefficient row sums to 1 to avoid clipping. For 5 or more channels they are normalized to 2 as a compromise between clipping and dynamic range reduction. -In these matricies the front left and front right channels are generally +In these matrices the front left and front right channels are generally passed through directly. When a surround channel is split between both the left and right stereo channels, coefficients are chosen so their squares sum to 1, which @@ -958,7 +984,7 @@ ]]> Exact coefficient values are 1 and 1/sqrt(2), multiplied by - 1/(1 + 1/sqrt(2)) for normalization. + 1/(1 + 1/sqrt(2)) for normalization.
@@ -1027,8 +1053,8 @@ sqrt(3)/2/sqrt(2), multiplied by 2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2 + sqrt(3)/2/sqrt(2) + 1/sqrt(2)) for normalization. -The coeffients are in the same order as in , - and the matricies above. +The coefficients are in the same order as in , + and the matrices above.
@@ -1044,8 +1070,8 @@ Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by 2/(2 + 2/sqrt(2) + sqrt(3)) for normalization. -The coeffients are in the same order as in , - and the matricies above. +The coefficients are in the same order as in , + and the matrices above. @@ -1093,7 +1119,7 @@ Vorbis, except (like Ogg Theora and Speex) the final "framing bit" specified in the Vorbis spec is not present. -Magic Signature: +Magic Signature: This is an 8-octet (64-bit) field that allows codec identification and is human-readable. @@ -1112,15 +1138,14 @@ invalid TOC sequence. -Vendor String Length (32 bits, unsigned, - little endian): +Vendor String Length (32 bits, unsigned, little endian): This field gives the length of the following vendor string, in octets. It MUST NOT indicate that the vendor string is longer than the rest of the packet. -Vendor String (variable length, UTF-8 vector): +Vendor String (variable length, UTF-8 vector): This is a simple human-readable tag for vendor information, encoded as a UTF-8 string . @@ -1128,12 +1153,11 @@ This tag is intended to identify the codec encoder and encapsulation implementations, for tracing differences in technical behavior. -User-facing encoding applications can use the 'ENCODER' user comment tag - to identify themselves. +User-facing applications can use the 'ENCODER' user comment tag to identify + themselves. -User Comment List Length (32 bits, unsigned, - little endian): +User Comment List Length (32 bits, unsigned, little endian): This field indicates the number of user-supplied comments. It MAY indicate there are zero user-supplied comments, in which case there are @@ -1142,8 +1166,7 @@ lengths would require more data than is available in the rest of the packet. -User Comment #i String Length (32 bits, - unsigned, little endian): +User Comment #i String Length (32 bits, unsigned, little endian): This field gives the length of the following user comment string, in octets. There is one for each user comment indicated by the 'user comment list length' @@ -1151,8 +1174,7 @@ It MUST NOT indicate that the string is longer than the rest of the packet. -User Comment #i String (variable length, UTF-8 - vector): +User Comment #i String (variable length, UTF-8 vector): This field contains a single user comment string. There is one for each user comment indicated by the 'user comment list length' @@ -1179,14 +1201,16 @@ SHOULD preserve the contents of this data when updating the tags, but if this bit is 0, all such data MAY be treated as padding, and truncated or discarded as desired. +This allows informal experimentation with the format of this binary data until + it can be specified later. The comment header can be arbitrarily large and might be spread over a large number of Ogg pages. -Decoders SHOULD avoid attempting to allocate excessive amounts of memory when - presented with a very large comment header. -To accomplish this, decoders MAY reject a comment header larger than +Implementations MUST avoid attempting to allocate excessive amounts of memory + when presented with a very large comment header. +To accomplish this, implementations MAY reject a comment header larger than 125,829,120 octets, and MAY ignore individual comments that are not fully contained within the first 61,440 octets of the comment header. @@ -1201,38 +1225,36 @@ Two new comment tags are introduced here: +First, an optional gain for track normalization:
- An optional gain for track nomalization - -representing the volume shift needed to normalize the track's volume +
+ + representing the volume shift needed to normalize the track's volume during isolated playback, in random shuffle, and so on. The gain is a Q7.8 fixed point number in dB, as in the ID header's 'output gain' field. - - - This tag is similar to the REPLAYGAIN_TRACK_GAIN tag in Vorbis , except that the normal volume reference is the standard. +Second, an optional gain for album normalization:
- An optional gain for album nomalization - -representing the volume shift needed to normalize the overall volume when +
+ + representing the volume shift needed to normalize the overall volume when played as part of a particular collection of tracks. The gain is also a Q7.8 fixed point number in dB, as in the ID header's 'output gain' field. - - + -An Ogg Opus stream MUST NOT have more than one of each tag, and if present - their values MUST be an integer from -32768 to 32767, inclusive, +An Ogg Opus stream MUST NOT have more than one of each of these tags, and if + present their values MUST be an integer from -32768 to 32767, inclusive, represented in ASCII as a base 10 number with no whitespace. A leading '+' or '-' character is valid. Leading zeros are also permitted, but the value MUST be represented by @@ -1248,13 +1270,14 @@ in addition to the 'output gain' value. If a tool modifies the ID header's 'output gain' field, it MUST also update or remove the R128_TRACK_GAIN and R128_ALBUM_GAIN comment tags if present. -An encoder SHOULD assume that by default tools will respect the 'output gain' - field, and not the comment tag. +A muxer SHOULD place the gain it wants other tools to use by default into the + 'output gain' field, and not the comment tag. To avoid confusion with multiple normalization schemes, an Opus comment header SHOULD NOT contain any of the REPLAYGAIN_TRACK_GAIN, REPLAYGAIN_TRACK_PEAK, - REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags. + REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags, unless they are only + to be used in some context where there is guaranteed to be no such confusion. normalization is preferred to the earlier REPLAYGAIN schemes because of its clear definition and adoption by industry. Peak normalizations are difficult to calculate reliably for lossy codecs @@ -1272,17 +1295,20 @@ Technically, valid Opus packets can be arbitrarily large due to the padding format, although the amount of non-padding data they can contain is bounded. These packets might be spread over a similarly enormous number of Ogg pages. -Encoders SHOULD limit the use of padding in audio data packets to no more than - is necessary to make a variable bitrate (VBR) stream constant bitrate (CBR). -Decoders SHOULD reject audio data packets larger than 61,440 octets per Opus - stream. -Such packets necessarily contain more padding than needed for this purpose. -Decoders SHOULD avoid attempting to allocate excessive amounts of memory when +When encoding, implementations SHOULD limit the use of padding in audio data + packets to no more than is necessary to make a variable bitrate (VBR) stream + constant bitrate (CBR), unless they have no reasonable way to determine what + is necessary. +Demuxers SHOULD reject audio data packets (treat them as if they were malformed + Opus packets with an invalid TOC sequence) larger than 61,440 octets per + Opus stream, unless they have a specific reason for allowing extra padding. +Such packets necessarily contain more padding than needed to make a stream CBR. +Demuxers MUST avoid attempting to allocate excessive amounts of memory when presented with a very large packet. -Decoders MAY reject or partially process audio data packets larger than +Demuxers MAY reject or partially process audio data packets larger than 61,440 octets in an Ogg Opus stream with channel mapping families 0 or 1. -Decoders MAY reject or partially process audio data packets in any Ogg Opus +Demuxers MAY reject or partially process audio data packets in any Ogg Opus stream if the packet is larger than 61,440 octets and also larger than 7,680 octets per Opus stream. The presence of an extremely large packet in the stream could indicate a @@ -1327,25 +1353,28 @@ When encoding Opus streams, Ogg muxers SHOULD take into account the algorithmic delay of the Opus encoder. + +In encoders derived from the reference + implementation , the number of samples can be + queried with: +
- -In encoders derived from the reference implementation, the number of - samples can be queried with: -
-To achieve good quality in the very first samples of a stream, the Ogg encoder - MAY use linear predictive coding (LPC) extrapolation - to generate at least 120 extra samples at - the beginning to avoid the Opus encoder having to encode a discontinuous - signal. -For an input file containing 'length' samples, the Ogg encoder SHOULD set the - pre-skip header value to delay_samples+extra_samples, encode at least - length+delay_samples+extra_samples samples, and set the granulepos of the last - page to length+delay_samples+extra_samples. +To achieve good quality in the very first samples of a stream, implementations + MAY use linear predictive coding (LPC) extrapolation to generate at least 120 + extra samples at the beginning to avoid the Opus encoder having to encode a + discontinuous signal. +For more information on linear prediction, see + . +For an input file containing 'length' samples, the implementation SHOULD set + the pre-skip header value to (delay_samples + extra_samples), encode + at least (length + delay_samples + extra_samples) + samples, and set the granule position of the last page to + (length + delay_samples + extra_samples). This ensures that the encoded file has the same duration as the original, with no time offset. The best way to pad the end of the stream is to also use LPC extrapolation, but zero-padding is also acceptable. @@ -1360,12 +1389,12 @@ The last N samples are used as memory to an infinite impulse response (IIR) filter. -
- + The filter is then applied on a zero input to extrapolate the end of the signal. Let a(k) be the kth LPC coefficient and x(n) be the nth sample of the signal, each new sample past the end of the signal is computed as: - + +
Encode the last frame of the first segment as an independent frame by turning off all forms of inter-frame prediction. De-emphasis is allowed. -Set the granulepos of the last page to a point near the end of the last - frame. +Set the granule position of the last page to a point near the end of the + last frame. Begin the second segment with a copy of the last frame of the first segment. Set the pre-skip value of the second stream in such a way as to properly @@ -1409,19 +1438,19 @@ the encoder. -
- + In encoders derived from the reference implementation, inter-frame prediction can be turned off by calling: - + +
- +
+ For best results, this implementation requires that prediction be explicitly enabled again before resuming normal encoding, even after a reset. - -
+
@@ -1431,11 +1460,12 @@ A brief summary of major implementations of this draft is available at , - along with their status. + along with their status. [Note to RFC Editor: please remove this entire section before - final publication per .] + final publication per , along with + its references.]
@@ -1444,14 +1474,14 @@ Implementations of the Opus codec need to take appropriate security considerations into account, as outlined in . This is just as much a problem for the container as it is for the codec itself. -It is extremely important for the decoder to be robust against malicious - payloads. -Malicious payloads MUST NOT cause the decoder to overrun its allocated memory - or to take an excessive amount of resources to decode. -Although problems in encoders are typically rarer, the same applies to the - encoder. -Malicious audio streams MUST NOT cause the encoder to misbehave because this - would allow an attacker to attack transcoding gateways. +Robustness against malicious payloads is extremely important. +Malicious payloads MUST NOT cause an implementation to overrun its allocated + memory or to take an excessive amount of resources to decode. +Although problems in encoding applications are typically rarer, the same + applies to the muxer. +Malicious audio input streams MUST NOT cause an implementation to overrun its + allocated memory or consume excessive resources because this would allow an + attacker to attack transcoding gateways.
@@ -1471,18 +1501,19 @@ The RECOMMENDED mime-type for Ogg Opus files is "audio/ogg". -
- + If more specificity is desired, one MAY indicate the presence of Opus streams - using the codecs parameter defined in , e.g., - + using the codecs parameter defined in and + , e.g., + +
- - for an Ogg Opus file. -
+ + for an Ogg Opus file. + The RECOMMENDED filename extension for Ogg Opus files is '.opus'. @@ -1497,30 +1528,83 @@ multiplexed segment, e.g. video or multiple audio tracks. In such cases the the '.opus' filename extension is NOT RECOMMENDED. -
-
-This document has no actions for IANA. +In either case, this document updates + to add 'opus' as a codecs parameter value with char[8]: 'OpusHead' + as Codec Identifier. + +
+ +
+ +This document updates the IANA Media Types registry to add .opus + as a file extension for "audio/ogg", and to add itself as a reference + alongside for "audio/ogg", "video/ogg", and + "application/ogg" Media Types. + + +This document defines a new registry "Opus Channel Mapping Families" to + indicate how the semantic meanings of the channels in a multi-channel Opus + stream are described. +IANA SHALL create a new name space of "Opus Channel Mapping Families". +All maintenance within and additions to the contents of this name space MUST be + according to the "Specification Requried with Expert Review" registration + policy as defined in . +Each registry entry consists of a Channel Mapping Family Number, which is + specified in decimal in the range 0 to 255, inclusive, and a Reference (or + list of references) +Each Reference must point to sufficient documentation to describe what + information is coded in the Opus identification header for this channel + mapping family, how a demuxer determines the Stream Count ('N') and Coupled + Stream Count ('M') from this information, and how it determines the proper + interpretation of each of the decoded channels. + + +This document defines three initial assignments for this registry. + + +ValueReference +0[RFCXXXX] +1[RFCXXXX] +255[RFCXXXX] + + +The designated expert will determine if the Reference points to a specification + that meets the requirements for permanence and ready availability laid out + in  and that it specifies the information + described above with sufficient clarity to allow interoperable + implementations.
-Thanks to Mark Harris, Greg Maxwell, Christopher "Monty" Montgomery, and - Jean-Marc Valin for their valuable contributions to this document. +Thanks to Ben Campbell, Mark Harris, Greg Maxwell, Christopher "Monty" + Montgomery, Jean-Marc Valin, and Mo Zanaty for their valuable contributions to + this document. Additional thanks to Andrew D'Addesio, Greg Maxwell, and Vincent Penquerc'h for their feedback based on early implementations.
-
+
-The authors agree to grant third parties the irrevocable right to copy, use, - and distribute the work, with or without modification, in any medium, without - royalty, provided that, unless separate permission is granted, redistributed - modified works do not contain misleading author, version, name of work, or - endorsement information. +In , "RFCXXXX" is to be replaced with the RFC number + assigned to this draft. + + +In the Copyright Notice at the start of the document, the following paragraph + is to be appended after the regular copyright notice text: + + +"The licenses granted by the IETF Trust to this RFC under Section 3.c of + the Trust Legal Provisions shall also include the right to extract text from + Sections 1 through 14 of this RFC and create derivative works from + these extracts, and to copy, publish, display, and distribute such derivative + works in any medium and for any purpose, provided that no such derivative work + shall be presented, displayed, or published in a manner that states or implies + that it is part of this RFC or any other IETF Document."
@@ -1530,6 +1614,8 @@ &rfc2119; &rfc3533; &rfc3629; + &rfc4732; + &rfc5226; &rfc5334; &rfc6381; &rfc6716; @@ -1560,8 +1646,8 @@ - &rfc4732; &rfc6982; + &rfc7587; diff -Nru opus-1.1.1/doc/Makefile.in opus-1.1.2/doc/Makefile.in --- opus-1.1.1/doc/Makefile.in 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/doc/Makefile.in 2016-01-12 22:27:45.000000000 +0000 @@ -179,6 +179,7 @@ OPUS_LT_AGE = @OPUS_LT_AGE@ OPUS_LT_CURRENT = @OPUS_LT_CURRENT@ OPUS_LT_REVISION = @OPUS_LT_REVISION@ +OPUS_X86_AVX_CFLAGS = @OPUS_X86_AVX_CFLAGS@ OPUS_X86_SSE2_CFLAGS = @OPUS_X86_SSE2_CFLAGS@ OPUS_X86_SSE4_1_CFLAGS = @OPUS_X86_SSE4_1_CFLAGS@ OPUS_X86_SSE_CFLAGS = @OPUS_X86_SSE_CFLAGS@ @@ -199,6 +200,7 @@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ +X86_AVX_CFLAGS = @X86_AVX_CFLAGS@ X86_SSE2_CFLAGS = @X86_SSE2_CFLAGS@ X86_SSE4_1_CFLAGS = @X86_SSE4_1_CFLAGS@ X86_SSE_CFLAGS = @X86_SSE_CFLAGS@ diff -Nru opus-1.1.1/include/opus_defines.h opus-1.1.2/include/opus_defines.h --- opus-1.1.1/include/opus_defines.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/include/opus_defines.h 2016-01-12 22:27:45.000000000 +0000 @@ -523,10 +523,19 @@ * @hideinitializer */ #define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x) /** Configures the depth of signal being encoded. + * * This is a hint which helps the encoder identify silence and near-silence. + * It represents the number of significant bits of linear intensity below + * which the signal contains ignorable quantization or other noise. + * + * For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting + * for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate + * for 16-bit linear pcm input with opus_encode_float(). + * * When using opus_encode() instead of opus_encode_float(), or when libopus * is compiled for fixed-point, the encoder uses the minimum of the value * set here and the value 16. + * * @see OPUS_GET_LSB_DEPTH * @param[in] x opus_int32: Input precision in bits, between 8 and 24 * (default: 24). diff -Nru opus-1.1.1/Makefile.in opus-1.1.2/Makefile.in --- opus-1.1.1/Makefile.in 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/Makefile.in 2016-01-12 22:27:45.000000000 +0000 @@ -942,6 +942,7 @@ OPUS_LT_AGE = @OPUS_LT_AGE@ OPUS_LT_CURRENT = @OPUS_LT_CURRENT@ OPUS_LT_REVISION = @OPUS_LT_REVISION@ +OPUS_X86_AVX_CFLAGS = @OPUS_X86_AVX_CFLAGS@ OPUS_X86_SSE2_CFLAGS = @OPUS_X86_SSE2_CFLAGS@ OPUS_X86_SSE4_1_CFLAGS = @OPUS_X86_SSE4_1_CFLAGS@ OPUS_X86_SSE_CFLAGS = @OPUS_X86_SSE_CFLAGS@ @@ -962,6 +963,7 @@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ +X86_AVX_CFLAGS = @X86_AVX_CFLAGS@ X86_SSE2_CFLAGS = @X86_SSE2_CFLAGS@ X86_SSE4_1_CFLAGS = @X86_SSE4_1_CFLAGS@ X86_SSE_CFLAGS = @X86_SSE_CFLAGS@ diff -Nru opus-1.1.1/Makefile.mips opus-1.1.2/Makefile.mips --- opus-1.1.1/Makefile.mips 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/Makefile.mips 2016-01-12 22:27:45.000000000 +0000 @@ -108,11 +108,16 @@ OPUSCOMPARE_SRCS_C = src/opus_compare.c OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C)) +TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding + # Rules -all: lib opus_demo opus_compare test_opus_api test_opus_decode test_opus_encode test_opus_padding +all: lib opus_demo opus_compare $(TESTS) lib: $(TARGET) +check: all + for test in $(TESTS); do ./$$test; done + $(TARGET): $(OBJS) $(ARCHIVE.cmdline) @@ -153,4 +158,4 @@ $(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \ $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS) -.PHONY: all lib clean +.PHONY: all lib clean force check diff -Nru opus-1.1.1/Makefile.unix opus-1.1.2/Makefile.unix --- opus-1.1.1/Makefile.unix 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/Makefile.unix 2016-01-12 22:27:45.000000000 +0000 @@ -106,11 +106,16 @@ OPUSCOMPARE_SRCS_C = src/opus_compare.c OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C)) +TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding + # Rules -all: lib opus_demo opus_compare test_opus_api test_opus_decode test_opus_encode test_opus_padding +all: lib opus_demo opus_compare $(TESTS) lib: $(TARGET) +check: all + for test in $(TESTS); do ./$$test; done + $(TARGET): $(OBJS) $(ARCHIVE.cmdline) @@ -151,4 +156,4 @@ $(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \ $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS) -.PHONY: all lib clean +.PHONY: all lib clean force check diff -Nru opus-1.1.1/package_version opus-1.1.2/package_version --- opus-1.1.1/package_version 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/package_version 2016-01-12 22:27:45.000000000 +0000 @@ -1 +1 @@ -PACKAGE_VERSION="1.1.1" +PACKAGE_VERSION="1.1.2" diff -Nru opus-1.1.1/releases.sha2 opus-1.1.2/releases.sha2 --- opus-1.1.1/releases.sha2 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/releases.sha2 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -b2f75c4ac5ab837845eb028413fae2a28754bfb0a6d76416e2af1441ef447649 opus-0.9.0.tar.gz -4e379a98ba95bbbfe9087ef10fdd05c8ac9060b6d695f587ea82a7b43a0df4fe opus-0.9.10.tar.gz -b1cad6846a8f819a141009fe3f8f10c946e8eff7e9c2339cd517bb136cc59eae opus-0.9.14.tar.gz -206221afc47b87496588013bd4523e1e9f556336c0813f4372773fc536dd4293 opus-0.9.1.tar.gz -6e85c1b57e1d7b7dfe2928bf92586b96b73a9067e054ede45bd8e6d24bd30582 opus-0.9.2.tar.gz -d916e34c18a396eb7dffc47af754f441af52a290b761e20db9aedb65928c699e opus-0.9.3.tar.gz -53801066fa97329768e7b871fd1495740269ec46802e1c9051aa7e78c6edee5b opus-0.9.5.tar.gz -3bfaeb25f4b4a625a0bc994d6fc6f6776a05193f60099e0a99f7530c6b256309 opus-0.9.6.tar.gz -1b69772c31c5cbaa43d1dfa5b1c495fc29712e8e0ff69d6f8ad46459e5c6715f opus-0.9.7.tar.gz -4aa30d2e0652ffb4a7a22cc8a29c4ce78267626f560a2d9213b1d2d4e618cf36 opus-0.9.8.tar.gz -2f62359f09151fa3b242040dc9b4c5b6bda15557c5daea59c8420f1a2ff328b7 opus-0.9.9.tar.gz -43bcea51afa531f32a6a5fdd9cba4bd496993e26a141217db3cccce6caa7cd74 opus-1.0.0-rc.tar.gz -9250fcc74472d45c1e14745542ec9c8d09982538aefed56962495614be3e0d2d opus-1.0.0.tar.gz -76bc0a31502a51dae9ab737b4db043b9ecfcd0b5861f0bfda41b662bd5b92227 opus-1.0.1-rc2.tar.gz -3de8d6809dac38971ebb305532d4ea532519d3bed08985f25d6c557f9ce5e8ff opus-1.0.1-rc3.tar.gz -8044397a6365a07117b08cbe8f9818bf7c93746908806ba74a2917187bbdda5f opus-1.0.1-rc.tar.gz -80fa5c3caf2ac0fd68f8a22cce1564fc46b368c773a17554887d0066fe1841ef opus-1.0.1.tar.gz -da615edbee5d019c1833071d69a4782c19f178cf9ca1401375036ecef25cd78a opus-1.0.2.tar.gz -191a089c92dbc403de6980463dd3604b65beb12d283c607e246c8076363cb49c opus-1.0.3.tar.gz -a8d40efe87f6c3e76725391457d46277878c7a816ae1642843261463133fa5c8 opus-1.1-alpha.tar.gz -ec1784287f385aef994b64734aaecae04860e61aa50fc6eef6643fa7e40dd193 opus-1.1-beta.tar.gz -8aa16360f59a94d3e38f38f28d24039f7663179682cbae82aa42f1dd9e52e6ed opus-1.1-rc.tar.gz -ebc87a086d4fe677c5e42d56888b1fd25af858e4179eae4f8656270410dffac3 opus-1.1-rc2.tar.gz -cbfd09c58cc10a4d3fcb727ad5d46d7bb549f8185ac922ee28b4581b52a7bee9 opus-1.1-rc3.tar.gz -b9727015a58affcf3db527322bf8c4d2fcf39f5f6b8f15dbceca20206cbe1d95 opus-1.1.tar.gz -0c668639dcd16b14709fc9dc49e6686606f5a256f2eaa1ebaa2f39a66f8626cd opus-1.1.1-beta.tar.gz -8071b968475c1a17f54b6840d6de9d9ee20f930e827b0401abe3c4cf4f3bf30a opusfile-0.1.tar.gz -b4a678b3b6c4adfb6aff1f67ef658becfe146ea7c7ff228e99543762171557f9 opusfile-0.2.tar.gz -94ac78ca4f74c4e43bc9fe4ec1ad0aa36f38ab90f45b0727c40dd1e96096e767 opus_testvectors-draft11.tar.gz -94ac78ca4f74c4e43bc9fe4ec1ad0aa36f38ab90f45b0727c40dd1e96096e767 opus_testvectors.tar.gz -5d2b99757bcb628bab2611f3ed27af6f35276ce3abc96c0ed4399d6c6463dda5 opus-tools-0.1.2.tar.gz -008317297d6ce84f84992abf8cc948a048a4fa135e1d1caf429fafde8965a792 opus-tools-0.1.3.tar.gz -de80485c5afa1fd83c0e16a0dd4860470c872997a7dd0a58e99b2ee8a93e5168 opus-tools-0.1.4.tar.gz -76678d0eb7a9b3d793bd0243f9ced9ab0ecdab263f5232ed940c8f5795fb0405 opus-tools-0.1.5.tar.gz -cc86dbc2a4d76da7e1ed9afee85448c8f798c465a5412233f178783220f3a2c1 opus-tools-0.1.6.tar.gz -e0f08d301555dffc417604269b5a85d2bd197f259c7d6c957f370ffd33d6d9cd opus-tools-0.1.7.tar.gz -e4e188579ea1c4e4d5066460d4a7214a7eafe3539e9a4466fdc98af41ba4a2f6 opus-tools-0.1.8.tar.gz -4248927f2c4e316ea5b84fb02bd100bfec8fa4624a6910d77f0af7f0c6cb8baa opusfile-0.3.tar.gz -9836ea11706c44f36de92c4c9b1248e03a4c521e7fb2cff18a0cb4f8b0e79140 opusfile-0.4.tar.gz -f187906b1b35f7f0d7de6a759b4aab512a9279d23adb35d8009e7e33bd6a922a opusfile-0.4.zip -2ce52d006aeeec9f10260dbe3073c4636954a1ab19c82b8baafefe0180aa4a39 opusfile-0.5.tar.gz -b940d62beb15b5974764574b9f265481fe5b6ee16902fb705727546caf956261 opusfile-0.5.zip -2428717b356e139f18ed2fdb5ad990b5654a238907a0058200b39c46a7d03ea6 opusfile-0.6.tar.gz -753339225193df605372944889023b9b3c5378d672e8784d69fa241cd465278c opusfile-0.6.zip -b1873dd78c7fbc98cf65d6e10cfddb5c2c03b3af93f922139a2104baedb4643a opus-tools-0.1.9.tar.gz diff -Nru opus-1.1.1/silk/decode_pulses.c opus-1.1.2/silk/decode_pulses.c --- opus-1.1.1/silk/decode_pulses.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/silk/decode_pulses.c 2016-01-12 22:27:45.000000000 +0000 @@ -69,9 +69,9 @@ sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 ); /* LSB indication */ - while( sum_pulses[ i ] == MAX_PULSES + 1 ) { + while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) { nLshifts[ i ]++; - /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */ + /* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */ sum_pulses[ i ] = ec_dec_icdf( psRangeDec, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 ); } diff -Nru opus-1.1.1/silk/define.h opus-1.1.2/silk/define.h --- opus-1.1.1/silk/define.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/silk/define.h 2016-01-12 22:27:45.000000000 +0000 @@ -169,7 +169,7 @@ #define N_RATE_LEVELS 10 /* Maximum sum of pulses per shell coding frame */ -#define MAX_PULSES 16 +#define SILK_MAX_PULSES 16 #define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ diff -Nru opus-1.1.1/silk/encode_pulses.c opus-1.1.2/silk/encode_pulses.c --- opus-1.1.1/silk/encode_pulses.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/silk/encode_pulses.c 2016-01-12 22:27:45.000000000 +0000 @@ -142,7 +142,7 @@ sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ]; for( i = 0; i < iter; i++ ) { if( nRshifts[ i ] > 0 ) { - sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ]; + sumBits_Q5 += nBits_ptr[ SILK_MAX_PULSES + 1 ]; } else { sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ]; } @@ -162,9 +162,9 @@ if( nRshifts[ i ] == 0 ) { ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 ); } else { - ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 ); + ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, cdf_ptr, 8 ); for( k = 0; k < nRshifts[ i ] - 1; k++ ) { - ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); + ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); } ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); } diff -Nru opus-1.1.1/silk/tables.h opus-1.1.2/silk/tables.h --- opus-1.1.1/silk/tables.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/silk/tables.h 2016-01-12 22:27:45.000000000 +0000 @@ -47,8 +47,8 @@ extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */ extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ]; /* 180 */ -extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ]; /* 162 */ +extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ SILK_MAX_PULSES + 2 ]; /* 180 */ +extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ SILK_MAX_PULSES + 2 ]; /* 162 */ extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ @@ -59,7 +59,7 @@ extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */ extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */ extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table_offsets[ MAX_PULSES + 1 ]; /* 17 */ +extern const opus_uint8 silk_shell_code_table_offsets[ SILK_MAX_PULSES + 1 ]; /* 17 */ extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */ diff -Nru opus-1.1.1/silk/x86/x86_silk_map.c opus-1.1.2/silk/x86/x86_silk_map.c --- opus-1.1.1/silk/x86/x86_silk_map.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/silk/x86/x86_silk_map.c 2016-01-12 22:27:45.000000000 +0000 @@ -50,6 +50,7 @@ silk_inner_prod16_aligned_64_c, silk_inner_prod16_aligned_64_c, MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ }; #endif @@ -62,6 +63,7 @@ silk_VAD_GetSA_Q8_c, silk_VAD_GetSA_Q8_c, MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ }; void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -85,6 +87,7 @@ silk_NSQ_c, silk_NSQ_c, MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ }; void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -104,6 +107,7 @@ silk_VQ_WMat_EC_c, silk_VQ_WMat_EC_c, MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ }; void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -127,6 +131,7 @@ silk_NSQ_del_dec_c, silk_NSQ_del_dec_c, MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ }; #if defined(FIXED_POINT) @@ -144,6 +149,7 @@ silk_warped_LPC_analysis_filter_FIX_c, silk_warped_LPC_analysis_filter_FIX_c, MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ }; void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( @@ -161,6 +167,7 @@ silk_burg_modified_c, silk_burg_modified_c, MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ + MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ }; #endif diff -Nru opus-1.1.1/src/analysis.c opus-1.1.2/src/analysis.c --- opus-1.1.1/src/analysis.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/src/analysis.c 2016-01-12 22:27:45.000000000 +0000 @@ -138,6 +138,21 @@ } } +void tonality_analysis_init(TonalityAnalysisState *tonal) +{ + /* Initialize reusable fields. */ + tonal->arch = opus_select_arch(); + /* Clear remaining fields. */ + tonality_analysis_reset(tonal); +} + +void tonality_analysis_reset(TonalityAnalysisState *tonal) +{ + /* Clear non-reusable fields. */ + char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; + OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); +} + void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) { int pos; @@ -187,7 +202,7 @@ info_out->music_prob = psum; } -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch) +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; @@ -260,7 +275,7 @@ remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out, arch); + opus_fft(kfft, in, out, tonal->arch); #ifndef FIXED_POINT /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ if (celt_isnan(out[0].r)) @@ -633,7 +648,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch) + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { int offset; int pcm_len; @@ -646,7 +661,7 @@ pcm_len = analysis_frame_size - analysis->analysis_offset; offset = analysis->analysis_offset; do { - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch); + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); offset += 480; pcm_len -= 480; } while (pcm_len>0); diff -Nru opus-1.1.1/src/analysis.h opus-1.1.2/src/analysis.h --- opus-1.1.1/src/analysis.h 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/src/analysis.h 2016-01-12 22:27:45.000000000 +0000 @@ -39,6 +39,8 @@ #define DETECT_SIZE 200 typedef struct { + int arch; +#define TONALITY_ANALYSIS_RESET_START angle float angle[240]; float d_angle[240]; float d2_angle[240]; @@ -78,10 +80,24 @@ AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; +/** Initialize a TonalityAnalysisState struct. + * + * This performs some possibly slow initialization steps which should + * not be repeated every analysis step. No allocated memory is retained + * by the state struct, so no cleanup call is required. + */ +void tonality_analysis_init(TonalityAnalysisState *analysis); + +/** Reset a TonalityAnalysisState stuct. + * + * Call this when there's a discontinuity in the data. + */ +void tonality_analysis_reset(TonalityAnalysisState *analysis); + void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch); + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); #endif diff -Nru opus-1.1.1/src/opus_decoder.c opus-1.1.2/src/opus_decoder.c --- opus-1.1.1/src/opus_decoder.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/src/opus_decoder.c 2016-01-12 22:27:45.000000000 +0000 @@ -59,6 +59,7 @@ opus_int32 Fs; /** Sampling rate (at the API level) */ silk_DecControlStruct DecControl; int decode_gain; + int arch; /* Everything beyond this point gets cleared on a reset */ #define OPUS_DECODER_RESET_START stream_channels @@ -75,7 +76,6 @@ #endif opus_uint32 rangeFinal; - int arch; }; diff -Nru opus-1.1.1/src/opus_demo.c opus-1.1.2/src/opus_demo.c --- opus-1.1.1/src/opus_demo.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/src/opus_demo.c 2016-01-12 22:27:45.000000000 +0000 @@ -866,7 +866,7 @@ 1e-3*bits_max*sampling_rate/frame_size); if (!decode_only) fprintf (stderr, "active bitrate: %7.3f kb/s\n", - 1e-3*bits_act*sampling_rate/(frame_size*(double)count_act)); + 1e-3*bits_act*sampling_rate/(1e-15+frame_size*(double)count_act)); fprintf (stderr, "bitrate standard deviation: %7.3f kb/s\n", 1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size); /* Close any files to which intermediate results were stored */ diff -Nru opus-1.1.1/src/opus_encoder.c opus-1.1.2/src/opus_encoder.c --- opus-1.1.1/src/opus_encoder.c 2015-11-26 13:26:00.000000000 +0000 +++ opus-1.1.2/src/opus_encoder.c 2016-01-12 22:27:45.000000000 +0000 @@ -81,6 +81,10 @@ int lsb_depth; int encoder_buffer; int lfe; + int arch; +#ifndef DISABLE_FLOAT_API + TonalityAnalysisState analysis; +#endif #define OPUS_ENCODER_RESET_START stream_channels int stream_channels; @@ -100,12 +104,9 @@ StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef DISABLE_FLOAT_API - TonalityAnalysisState analysis; int detected_bandwidth; - int analysis_offset; #endif opus_uint32 rangeFinal; - int arch; }; /* Transition tables for the voice and music. First column is the @@ -243,6 +244,10 @@ st->mode = MODE_HYBRID; st->bandwidth = OPUS_BANDWIDTH_FULLBAND; +#ifndef DISABLE_FLOAT_API + tonality_analysis_init(&st->analysis); +#endif + return OPUS_OK; } @@ -1006,7 +1011,7 @@ analysis_read_subframe_bak = st->analysis.read_subframe; run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, c1, c2, analysis_channels, st->Fs, - lsb_depth, downmix, &analysis_info, st->arch); + lsb_depth, downmix, &analysis_info); } #else (void)analysis_pcm; @@ -2449,11 +2454,14 @@ { void *silk_enc; silk_EncControlStruct dummy; + char *start; silk_enc = (char*)st+st->silk_enc_offset; +#ifndef DISABLE_FLOAT_API + tonality_analysis_reset(&st->analysis); +#endif - OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, - sizeof(OpusEncoder)- - ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); + start = (char*)&st->OPUS_ENCODER_RESET_START; + OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); silk_InitEncoder( silk_enc, st->arch, &dummy );