diff -Nru aom-3.6.0/aom/aomcx.h aom-3.6.1/aom/aomcx.h --- aom-3.6.0/aom/aomcx.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom/aomcx.h 2023-05-08 18:17:52.000000000 +0000 @@ -616,11 +616,14 @@ * point (OP), int parameter * Possible values are in the form of "ABxy". * - AB: OP index. - * - xy: Target level index for the OP. Can be values 0~27 (corresponding to - * level 2.0 ~ 8.3, note levels 2.2, 2.3, 3.2, 3.3, 4.2 & 4.3 are - * undefined, and that levels 7.x and 8.x are in draft status), 31 - * (maximum parameters level, no level-based constraints) or 32 (keep - * level stats only for level monitoring). + * - xy: Target level index for the OP. Possible values are: + * + 0~27: corresponding to level 2.0 ~ 8.3. Note: + * > Levels 2.2 (2), 2.3 (3), 3.2 (6), 3.3 (7), 4.2 (10) & 4.3 (11) are + * undefined. + * > Levels 7.x and 8.x (20~27) are in draft status, available under the + * config flag CONFIG_CWG_C013. + * + 31: maximum parameters level, no level-based constraints. + * + 32: keep level stats only for level monitoring. * * E.g.: * - "0" means target level index 0 (2.0) for the 0th OP; diff -Nru aom-3.6.0/aom_dsp/noise_model.c aom-3.6.1/aom_dsp/noise_model.c --- aom-3.6.0/aom_dsp/noise_model.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_dsp/noise_model.c 2023-05-08 18:17:52.000000000 +0000 @@ -387,7 +387,7 @@ max_output_points = solver->num_bins; } - double *residual = aom_malloc(solver->num_bins * sizeof(*residual)); + double *residual = (double *)aom_malloc(solver->num_bins * sizeof(*residual)); if (!residual) { aom_noise_strength_lut_free(lut); return 0; @@ -1532,11 +1532,11 @@ ctx->bit_depth = bit_depth; ctx->noise_psd[0] = - aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size); + (float *)aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size); ctx->noise_psd[1] = - aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size); + (float *)aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size); ctx->noise_psd[2] = - aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size); + (float *)aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size); if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) { fprintf(stderr, "Unable to allocate noise PSD buffers\n"); aom_denoise_and_model_free(ctx); @@ -1576,16 +1576,20 @@ aom_free(ctx->flat_blocks); ctx->flat_blocks = NULL; - ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd); - ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd); - ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd); + ctx->denoised[0] = + (uint8_t *)aom_malloc((sd->y_stride * sd->y_height) << use_highbd); + ctx->denoised[1] = + (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd); + ctx->denoised[2] = + (uint8_t *)aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd); if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) { fprintf(stderr, "Unable to allocate denoise buffers\n"); return 0; } ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size; ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size; - ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h); + ctx->flat_blocks = + (uint8_t *)aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h); if (!ctx->flat_blocks) { fprintf(stderr, "Unable to allocate flat_blocks buffer\n"); return 0; diff -Nru aom-3.6.0/aom_ports/aom_once.h aom-3.6.1/aom_ports/aom_once.h --- aom-3.6.0/aom_ports/aom_once.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_ports/aom_once.h 2023-05-08 18:17:52.000000000 +0000 @@ -39,6 +39,8 @@ */ #if CONFIG_MULTITHREAD && defined(_WIN32) +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN #include /* Declare a per-compilation-unit state variable to track the progress * of calling func() only once. This must be at global scope because diff -Nru aom-3.6.0/aom_ports/aom_timer.h aom-3.6.1/aom_ports/aom_timer.h --- aom-3.6.0/aom_ports/aom_timer.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_ports/aom_timer.h 2023-05-08 18:17:52.000000000 +0000 @@ -22,9 +22,10 @@ /* * Win32 specific includes */ -#ifndef WIN32_LEAN_AND_MEAN +#undef NOMINMAX +#define NOMINMAX +#undef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN -#endif #include #else /* diff -Nru aom-3.6.0/aom_ports/arm_cpudetect.c aom-3.6.1/aom_ports/arm_cpudetect.c --- aom-3.6.0/aom_ports/arm_cpudetect.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_ports/arm_cpudetect.c 2023-05-08 18:17:52.000000000 +0000 @@ -58,7 +58,9 @@ #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT || __APPLE__ */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +#undef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#undef WIN32_EXTRA_LEAN #define WIN32_EXTRA_LEAN #include diff -Nru aom-3.6.0/aom_ports/x86.h aom-3.6.1/aom_ports/x86.h --- aom-3.6.0/aom_ports/x86.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_ports/x86.h 2023-05-08 18:17:52.000000000 +0000 @@ -148,6 +148,10 @@ #endif #if defined(_MSC_VER) && _MSC_VER >= 1700 +#undef NOMINMAX +#define NOMINMAX +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN #include #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) #define getenv(x) NULL diff -Nru aom-3.6.0/aom_util/aom_thread.h aom-3.6.1/aom_util/aom_thread.h --- aom-3.6.0/aom_util/aom_thread.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/aom_util/aom_thread.h 2023-05-08 18:17:52.000000000 +0000 @@ -28,6 +28,11 @@ #if CONFIG_MULTITHREAD #if defined(_WIN32) && !HAVE_PTHREAD_H +// Prevent leaking max/min macros. +#undef NOMINMAX +#define NOMINMAX +#undef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN #include // NOLINT #include // NOLINT #include // NOLINT diff -Nru aom-3.6.0/AUTHORS aom-3.6.1/AUTHORS --- aom-3.6.0/AUTHORS 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/AUTHORS 2023-05-08 18:17:52.000000000 +0000 @@ -147,6 +147,7 @@ Lauren Partin Lawrence Velázquez leolzhao +L. E. Segovia Lester Lu liang zhao Linfeng Zhang diff -Nru aom-3.6.0/av1/av1_cx_iface.c aom-3.6.1/av1/av1_cx_iface.c --- aom-3.6.0/av1/av1_cx_iface.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/av1_cx_iface.c 2023-05-08 18:17:52.000000000 +0000 @@ -2930,7 +2930,7 @@ AV1EncoderConfig *oxcf = &cpi->oxcf; const BLOCK_SIZE sb_size = av1_select_sb_size( oxcf, oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height, - cpi->svc.number_spatial_layers); + ppi->number_spatial_layers); oxcf->border_in_pixels = av1_get_enc_border_size(av1_is_resize_needed(oxcf), oxcf->kf_cfg.key_freq_max == 0, sb_size); diff -Nru aom-3.6.0/av1/common/arm/highbd_inv_txfm_neon.c aom-3.6.1/av1/common/arm/highbd_inv_txfm_neon.c --- aom-3.6.0/av1/common/arm/highbd_inv_txfm_neon.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/common/arm/highbd_inv_txfm_neon.c 2023-05-08 18:17:52.000000000 +0000 @@ -72,29 +72,12 @@ } } -static INLINE void av1_round_shift_rect_array_32_neon(int32x4_t *input, - int32x4_t *output, - const int size, - const int bit, - const int val) { - const int32x4_t sqrt2 = vdupq_n_s32(val); - const int32x4_t v_bit = vdupq_n_s32(-bit); - const int32x4_t rnding = vdupq_n_s32(1 << (bit - 1)); - const int32x4_t rnding2 = vdupq_n_s32(1 << (NewSqrt2Bits - 1)); - int i; - if (bit > 0) { - for (i = 0; i < size; i++) { - int32x4_t vradd = vshlq_s32(input[i], rnding); - const int32x4_t r0 = vshlq_s32(vradd, v_bit); - const int32x4_t r1 = vmlaq_s32(rnding2, sqrt2, r0); - output[i] = vshrq_n_s32(r1, NewSqrt2Bits); - } - } else { - for (i = 0; i < size; i++) { - const int32x4_t r0 = vshlq_s32(input[i], v_bit); - const int32x4_t r1 = vmlaq_s32(rnding2, sqrt2, r0); - output[i] = vshrq_n_s32(r1, NewSqrt2Bits); - } +static INLINE void round_shift_rect_array_32_neon(int32x4_t *input, + int32x4_t *output, + const int size) { + for (int i = 0; i < size; i++) { + const int32x4_t r0 = vmulq_n_s32(input[i], NewInvSqrt2); + output[i] = vrshrq_n_s32(r0, NewSqrt2Bits); } } @@ -180,18 +163,13 @@ return vreinterpretq_u16_s16(clamped); } -static INLINE void round_shift_4x4(int32x4_t *in, int shift, - const int32x4_t *rnding) { +static INLINE void round_shift_4x4(int32x4_t *in, int shift) { if (shift != 0) { const int32x4_t v_shift = vdupq_n_s32(-shift); - int32x4_t vradd = vaddq_s32(in[0], *rnding); - in[0] = vshlq_s32(vradd, v_shift); - vradd = vaddq_s32(in[1], *rnding); - in[1] = vshlq_s32(vradd, v_shift); - vradd = vaddq_s32(in[2], *rnding); - in[2] = vshlq_s32(vradd, v_shift); - vradd = vaddq_s32(in[3], *rnding); - in[3] = vshlq_s32(vradd, v_shift); + in[0] = vrshlq_s32(in[0], v_shift); + in[1] = vrshlq_s32(in[1], v_shift); + in[2] = vrshlq_s32(in[2], v_shift); + in[3] = vrshlq_s32(in[3], v_shift); } } @@ -549,10 +527,9 @@ const int log_range_out = AOMMAX(16, bd + 6); const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1))); const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1); - const int32x4_t rnding = vdupq_n_s32(1 << (out_shift - 1)); for (int i = 0; i < 32; i += 8) { - round_shift_4x4(out + i, out_shift, &rnding); - round_shift_4x4(out + i + 4, out_shift, &rnding); + round_shift_4x4(out + i, out_shift); + round_shift_4x4(out + i + 4, out_shift); } highbd_clamp_s32_neon(out, out, &clamp_lo_out, &clamp_hi_out, 32); } @@ -771,8 +748,7 @@ const int log_range = AOMMAX(16, bd + 6); const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1))); const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1); - const int32x4_t rnding32 = vdupq_n_s32(1 << (out_shift - 1)); - round_shift_4x4(out, out_shift, &rnding32); + round_shift_4x4(out, out_shift); highbd_clamp_s32_neon(out, out, &clamp_lo, &clamp_hi, 4); } } @@ -781,8 +757,7 @@ int fliplr, int flipud, int shift, int bd) { uint32x4_t u0, u1, u2, u3; uint16x4_t v0, v1, v2, v3; - const int32x4_t rnding = vdupq_n_s32(1 << (shift - 1)); - round_shift_4x4(in, shift, &rnding); + round_shift_4x4(in, shift); v0 = vld1_u16(output + 0 * stride); v1 = vld1_u16(output + 1 * stride); @@ -857,8 +832,7 @@ const int log_range = AOMMAX(16, bd + 6); const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1))); const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1); - const int32x4_t rnding32 = vdupq_n_s32(1 << (out_shift - 1)); - round_shift_4x4(out, out_shift, &rnding32); + round_shift_4x4(out, out_shift); highbd_clamp_s32_neon(out, out, &clamp_lo, &clamp_hi, 4); } v[0] = out[0]; @@ -1372,9 +1346,8 @@ const int log_range = AOMMAX(16, bd + 6); const int32x4_t clamp_lo = vdupq_n_s32(-(1 << (log_range - 1))); const int32x4_t clamp_hi = vdupq_n_s32((1 << (log_range - 1)) - 1); - const int32x4_t rnding = vdupq_n_s32(1 << (out_shift - 1)); - round_shift_4x4(out, out_shift, &rnding); - round_shift_4x4(out + 4, out_shift, &rnding); + round_shift_4x4(out, out_shift); + round_shift_4x4(out + 4, out_shift); highbd_clamp_s32_neon(out, out, &clamp_lo, &clamp_hi, 8); } } @@ -1651,9 +1624,8 @@ const int log_range_out = AOMMAX(16, bd + 6); const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1))); const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1); - const int32x4_t rnding32 = vdupq_n_s32(1 << (out_shift - 1)); - round_shift_4x4(out, out_shift, &rnding32); - round_shift_4x4(out + 4, out_shift, &rnding32); + round_shift_4x4(out, out_shift); + round_shift_4x4(out + 4, out_shift); highbd_clamp_s32_neon(out, out, &clamp_lo_out, &clamp_hi_out, 8); } } @@ -3053,9 +3025,8 @@ const int log_range_out = AOMMAX(16, bd + 6); const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1))); const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1); - const int32x4_t rnding = vdupq_n_s32(1 << (out_shift - 1)); for (int i = 0; i < 64; i += 4) { - round_shift_4x4(out + i, out_shift, &rnding); + round_shift_4x4(out + i, out_shift); highbd_clamp_s32_neon(out + i, out + i, &clamp_lo_out, &clamp_hi_out, 4); } } @@ -4187,9 +4158,8 @@ const int32x4_t clamp_lo_out = vdupq_n_s32(-(1 << (log_range_out - 1))); const int32x4_t clamp_hi_out = vdupq_n_s32((1 << (log_range_out - 1)) - 1); - const int32x4_t rnding32 = vdupq_n_s32(1 << (out_shift - 1)); for (i = 0; i < 64; i += 4) { - round_shift_4x4(out + i, out_shift, &rnding32); + round_shift_4x4(out + i, out_shift); highbd_clamp_s32_neon(out + i, out + i, &clamp_lo_out, &clamp_hi_out, 4); } @@ -4833,7 +4803,7 @@ const int32_t *input_row = input; int32x4_t *buf0_cur = buf0; load_buffer_32bit_input(input_row, input_stride, buf0_cur, txfm_size_row); - av1_round_shift_rect_array_32_neon(buf0, buf0, txfm_size_row, 0, NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, txfm_size_row); row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); row_txfm(buf0 + 4, buf0 + 4, INV_COS_BIT, 0, bd, -shift[0]); @@ -4890,7 +4860,7 @@ TRANSPOSE_4X4(buf0[1], buf0[3], buf0[5], buf0[7], buf1[4], buf1[5], buf1[6], buf1[7]); - av1_round_shift_rect_array_32_neon(buf1, buf0, txfm_size_col, 0, NewInvSqrt2); + round_shift_rect_array_32_neon(buf1, buf0, txfm_size_col); row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); int32x4_t *buf1_ptr; @@ -5311,8 +5281,7 @@ load_buffer_32bit_input(input_row + j * 4, input_stride, buf0_cur, 4); } if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_neon(buf0, buf0, input_stride, 0, - NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, input_stride); } row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); @@ -5377,8 +5346,7 @@ buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3]); } if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_neon( - buf0, buf0, (buf_size_nonzero_w_div8 << 3), 0, NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, buf_size_nonzero_w_div8 << 3); } row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); @@ -5442,8 +5410,7 @@ load_buffer_32bit_input(input_row + j * 4, input_stride, buf0_cur, 4); } if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_neon(buf0, buf0, input_stride, 0, - NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, input_stride); } row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); @@ -5512,8 +5479,7 @@ buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3]); } if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_neon( - buf0, buf0, buf_size_nonzero_w_div8 << 3, 0, NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, buf_size_nonzero_w_div8 << 3); } row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); @@ -5597,8 +5563,7 @@ buf0_cur[0], buf0_cur[1], buf0_cur[2], buf0_cur[3]); } if (rect_type == 1 || rect_type == -1) { - av1_round_shift_rect_array_32_neon( - buf0, buf0, buf_size_nonzero_w_div8 << 3, 0, NewInvSqrt2); + round_shift_rect_array_32_neon(buf0, buf0, buf_size_nonzero_w_div8 << 3); } row_txfm(buf0, buf0, INV_COS_BIT, 0, bd, -shift[0]); diff -Nru aom-3.6.0/av1/common/av1_common_int.h aom-3.6.1/av1/common/av1_common_int.h --- aom-3.6.0/av1/common/av1_common_int.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/common/av1_common_int.h 2023-05-08 18:17:52.000000000 +0000 @@ -1871,7 +1871,14 @@ // The following levels are currently undefined. seq_level_idx != SEQ_LEVEL_2_2 && seq_level_idx != SEQ_LEVEL_2_3 && seq_level_idx != SEQ_LEVEL_3_2 && seq_level_idx != SEQ_LEVEL_3_3 && - seq_level_idx != SEQ_LEVEL_4_2 && seq_level_idx != SEQ_LEVEL_4_3); + seq_level_idx != SEQ_LEVEL_4_2 && seq_level_idx != SEQ_LEVEL_4_3 +#if !CONFIG_CWG_C013 + && seq_level_idx != SEQ_LEVEL_7_0 && seq_level_idx != SEQ_LEVEL_7_1 && + seq_level_idx != SEQ_LEVEL_7_2 && seq_level_idx != SEQ_LEVEL_7_3 && + seq_level_idx != SEQ_LEVEL_8_0 && seq_level_idx != SEQ_LEVEL_8_1 && + seq_level_idx != SEQ_LEVEL_8_2 && seq_level_idx != SEQ_LEVEL_8_3 +#endif + ); } /*!\endcond */ diff -Nru aom-3.6.0/av1/common/tile_common.c aom-3.6.1/av1/common/tile_common.c --- aom-3.6.0/av1/common/tile_common.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/common/tile_common.c 2023-05-08 18:17:52.000000000 +0000 @@ -40,6 +40,7 @@ const int sb_size_log2 = seq_params->mib_size_log2 + MI_SIZE_LOG2; tiles->max_width_sb = MAX_TILE_WIDTH >> sb_size_log2; +#if CONFIG_CWG_C013 bool use_level_7_above = false; for (int i = 0; i < seq_params->operating_points_cnt_minus_1 + 1; i++) { if (seq_params->seq_level_idx[i] >= SEQ_LEVEL_7_0 && @@ -57,6 +58,9 @@ const int max_tile_area_sb = (use_level_7_above ? MAX_TILE_AREA_LEVEL_7_AND_ABOVE : MAX_TILE_AREA) >> (2 * sb_size_log2); +#else + const int max_tile_area_sb = MAX_TILE_AREA >> (2 * sb_size_log2); +#endif tiles->min_log2_cols = tile_log2(tiles->max_width_sb, sb_cols); tiles->max_log2_cols = tile_log2(1, AOMMIN(sb_cols, MAX_TILE_COLS)); diff -Nru aom-3.6.0/av1/common/tile_common.h aom-3.6.1/av1/common/tile_common.h --- aom-3.6.0/av1/common/tile_common.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/common/tile_common.h 2023-05-08 18:17:52.000000000 +0000 @@ -52,7 +52,9 @@ // The minimum tile width or height is fixed at one superblock #define MAX_TILE_WIDTH (4096) // Max Tile width in pixels #define MAX_TILE_AREA (4096 * 2304) // Maximum tile area in pixels +#if CONFIG_CWG_C013 #define MAX_TILE_AREA_LEVEL_7_AND_ABOVE (4096 * 4608) +#endif void av1_get_uniform_tile_size(const struct AV1Common *cm, int *w, int *h); void av1_get_tile_limits(struct AV1Common *const cm); diff -Nru aom-3.6.0/av1/encoder/arm/neon/picksrt_neon.c aom-3.6.1/av1/encoder/arm/neon/picksrt_neon.c --- aom-3.6.0/av1/encoder/arm/neon/picksrt_neon.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/arm/neon/picksrt_neon.c 2023-05-08 18:17:52.000000000 +0000 @@ -96,10 +96,10 @@ int32x4_t v0 = vmulq_n_s32(flt_16b_lo, xq_active); v0 = vmlsq_n_s32(v0, vreinterpretq_s32_u16(d0w.val[0]), - xq_active << SGRPROJ_RST_BITS); + xq_active * (1 << SGRPROJ_RST_BITS)); int32x4_t v1 = vmulq_n_s32(flt_16b_hi, xq_active); v1 = vmlsq_n_s32(v1, vreinterpretq_s32_u16(d0w.val[1]), - xq_active << SGRPROJ_RST_BITS); + xq_active * (1 << SGRPROJ_RST_BITS)); const int16x4_t vr0 = vqrshrn_n_s32(v0, 11); const int16x4_t vr1 = vqrshrn_n_s32(v1, 11); const int16x8_t e0 = diff -Nru aom-3.6.0/av1/encoder/encoder.c aom-3.6.1/av1/encoder/encoder.c --- aom-3.6.0/av1/encoder/encoder.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/encoder.c 2023-05-08 18:17:52.000000000 +0000 @@ -342,7 +342,7 @@ if (!cpi->ppi->seq_params_locked) set_sb_size(cm->seq_params, av1_select_sb_size(&cpi->oxcf, cm->width, cm->height, - cpi->svc.number_spatial_layers)); + cpi->ppi->number_spatial_layers)); set_tile_info(cm, &cpi->oxcf.tile_cfg); } @@ -363,6 +363,9 @@ static void set_bitstream_level_tier(AV1_PRIMARY *const ppi, int width, int height, double init_framerate) { SequenceHeader *const seq_params = &ppi->seq_params; +#if CONFIG_CWG_C013 + const AV1LevelParams *const level_params = &ppi->level_params; +#endif // TODO(any): This is a placeholder function that only addresses dimensions // and max display sample rates. // Need to add checks for max bit rate, max decoded luma sample rate, header @@ -403,25 +406,33 @@ } else if (does_level_match(width, height, init_framerate, 8192, 4352, 120.0, 2)) { level = SEQ_LEVEL_6_2; - } else if (does_level_match(width, height, init_framerate, 16384, 8704, 30.0, - 2)) { - level = SEQ_LEVEL_7_0; - } else if (does_level_match(width, height, init_framerate, 16384, 8704, 60.0, - 2)) { - level = SEQ_LEVEL_7_1; - } else if (does_level_match(width, height, init_framerate, 16384, 8704, 120.0, - 2)) { - level = SEQ_LEVEL_7_2; - } else if (does_level_match(width, height, init_framerate, 32768, 17408, 30.0, - 2)) { - level = SEQ_LEVEL_8_0; - } else if (does_level_match(width, height, init_framerate, 32768, 17408, 60.0, - 2)) { - level = SEQ_LEVEL_8_1; - } else if (does_level_match(width, height, init_framerate, 32768, 17408, - 120.0, 2)) { - level = SEQ_LEVEL_8_2; } +#if CONFIG_CWG_C013 + // TODO(bohanli): currently target level is only working for the 0th operating + // point, so scalable coding is not supported. + else if (level_params->target_seq_level_idx[0] >= SEQ_LEVEL_7_0 && + level_params->target_seq_level_idx[0] <= SEQ_LEVEL_8_3) { + // Only use level 7.x to 8.x when explicitly asked to. + if (does_level_match(width, height, init_framerate, 16384, 8704, 30.0, 2)) { + level = SEQ_LEVEL_7_0; + } else if (does_level_match(width, height, init_framerate, 16384, 8704, + 60.0, 2)) { + level = SEQ_LEVEL_7_1; + } else if (does_level_match(width, height, init_framerate, 16384, 8704, + 120.0, 2)) { + level = SEQ_LEVEL_7_2; + } else if (does_level_match(width, height, init_framerate, 32768, 17408, + 30.0, 2)) { + level = SEQ_LEVEL_8_0; + } else if (does_level_match(width, height, init_framerate, 32768, 17408, + 60.0, 2)) { + level = SEQ_LEVEL_8_1; + } else if (does_level_match(width, height, init_framerate, 32768, 17408, + 120.0, 2)) { + level = SEQ_LEVEL_8_2; + } + } +#endif for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) { seq_params->seq_level_idx[i] = level; diff -Nru aom-3.6.0/av1/encoder/encoder_utils.c aom-3.6.1/av1/encoder/encoder_utils.c --- aom-3.6.0/av1/encoder/encoder_utils.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/encoder_utils.c 2023-05-08 18:17:52.000000000 +0000 @@ -847,7 +847,7 @@ if (!cpi->ppi->seq_params_locked) { set_sb_size(cm->seq_params, av1_select_sb_size(&cpi->oxcf, cm->width, cm->height, - cpi->svc.number_spatial_layers)); + cpi->ppi->number_spatial_layers)); } } else { const RefCntBuffer *const primary_ref_buf = get_primary_ref_frame_buf(cm); diff -Nru aom-3.6.0/av1/encoder/ethread.c aom-3.6.1/av1/encoder/ethread.c --- aom-3.6.0/av1/encoder/ethread.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/ethread.c 2023-05-08 18:17:52.000000000 +0000 @@ -1314,16 +1314,10 @@ int num_workers) { MultiThreadInfo *const mt_info = &cpi->mt_info; AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &cpi->td.mb.e_mbd; for (int i = num_workers - 1; i >= 0; i--) { AVxWorker *const worker = &mt_info->workers[i]; EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; - // Initialize loopfilter data - thread_data->lf_sync = &mt_info->lf_row_sync; - thread_data->lf_data = &thread_data->lf_sync->lfdata[i]; - loop_filter_data_reset(thread_data->lf_data, &cm->cur_frame->buf, cm, xd); - worker->hook = hook; worker->data1 = thread_data; worker->data2 = NULL; @@ -1613,7 +1607,7 @@ } #endif -static void lpf_pipeline_mt_init(AV1_COMP *cpi) { +static void lpf_pipeline_mt_init(AV1_COMP *cpi, int num_workers) { // Pipelining of loop-filtering after encoding is enabled when loop-filter // level is chosen based on quantizer and frame type. It is disabled in case // of 'LOOPFILTER_SELECTIVELY' as the stats collected during encoding stage @@ -1624,18 +1618,20 @@ const int use_superres = av1_superres_scaled(cm); const int use_cdef = is_cdef_used(cm); const int use_restoration = is_restoration_used(cm); + MultiThreadInfo *const mt_info = &cpi->mt_info; + MACROBLOCKD *xd = &cpi->td.mb.e_mbd; const unsigned int skip_apply_postproc_filters = derive_skip_apply_postproc_filters(cpi, use_loopfilter, use_cdef, use_superres, use_restoration); - cpi->mt_info.pipeline_lpf_mt_with_enc = + mt_info->pipeline_lpf_mt_with_enc = (cpi->oxcf.mode == REALTIME) && (cpi->oxcf.speed >= 5) && (cpi->sf.lpf_sf.lpf_pick == LPF_PICK_FROM_Q) && (cpi->oxcf.algo_cfg.loopfilter_control != LOOPFILTER_SELECTIVELY) && !cpi->ppi->rtc_ref.non_reference_frame && !cm->features.allow_intrabc && ((skip_apply_postproc_filters & SKIP_APPLY_LOOPFILTER) == 0); - if (!cpi->mt_info.pipeline_lpf_mt_with_enc) return; + if (!mt_info->pipeline_lpf_mt_with_enc) return; set_postproc_filter_default_params(cm); @@ -1661,12 +1657,20 @@ av1_loop_filter_frame_init(cm, plane_start, plane_end); - assert(cpi->mt_info.num_mod_workers[MOD_ENC] == - cpi->mt_info.num_mod_workers[MOD_LPF]); + assert(mt_info->num_mod_workers[MOD_ENC] == + mt_info->num_mod_workers[MOD_LPF]); loop_filter_frame_mt_init(cm, start_mi_row, end_mi_row, planes_to_lf, - cpi->mt_info.num_mod_workers[MOD_LPF], - &cpi->mt_info.lf_row_sync, lpf_opt_level, + mt_info->num_mod_workers[MOD_LPF], + &mt_info->lf_row_sync, lpf_opt_level, cm->seq_params->mib_size_log2); + + for (int i = num_workers - 1; i >= 0; i--) { + EncWorkerData *const thread_data = &mt_info->tile_thr_data[i]; + // Initialize loopfilter data + thread_data->lf_sync = &mt_info->lf_row_sync; + thread_data->lf_data = &thread_data->lf_sync->lfdata[i]; + loop_filter_data_reset(thread_data->lf_data, &cm->cur_frame->buf, cm, xd); + } } } @@ -1701,7 +1705,8 @@ cpi->oxcf.algo_cfg.cdf_update_mode); } - lpf_pipeline_mt_init(cpi); + num_workers = AOMMIN(num_workers, mt_info->num_workers); + lpf_pipeline_mt_init(cpi, num_workers); av1_init_tile_data(cpi); @@ -1731,8 +1736,6 @@ } } - num_workers = AOMMIN(num_workers, mt_info->num_workers); - assign_tile_to_thread(thread_id_to_tile_id, tile_cols * tile_rows, num_workers); prepare_enc_workers(cpi, enc_row_mt_worker_hook, num_workers); diff -Nru aom-3.6.0/av1/encoder/gop_structure.c aom-3.6.1/av1/encoder/gop_structure.c --- aom-3.6.0/av1/encoder/gop_structure.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/gop_structure.c 2023-05-08 18:17:52.000000000 +0000 @@ -813,7 +813,7 @@ } gf_group->layer_depth[gf_index] = AOMMAX(log_gop_length - count, 0); } - gf_group->max_layer_depth = log_gop_length; + gf_group->max_layer_depth = AOMMIN(log_gop_length, MAX_ARF_LAYERS); } void av1_gop_setup_structure(AV1_COMP *cpi) { diff -Nru aom-3.6.0/av1/encoder/level.c aom-3.6.1/av1/encoder/level.c --- aom-3.6.0/av1/encoder/level.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/level.c 2023-05-08 18:17:52.000000000 +0000 @@ -209,6 +209,7 @@ .high_cr = 4.0, .max_tiles = 128, .max_tile_cols = 16 }, +#if CONFIG_CWG_C013 { .level = SEQ_LEVEL_7_0, .max_picture_size = 142606336, .max_h_size = 32768, @@ -313,6 +314,16 @@ .high_cr = 4.0, .max_tiles = 512, .max_tile_cols = 64 }, +#else // !CONFIG_CWG_C013 + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, + UNDEFINED_LEVEL, +#endif // CONFIG_CWG_C013 }; typedef enum { @@ -1016,9 +1027,13 @@ break; } +#if CONFIG_CWG_C013 const int max_tile_size = (level >= SEQ_LEVEL_7_0 && level <= SEQ_LEVEL_8_3) ? MAX_TILE_AREA_LEVEL_7_AND_ABOVE : MAX_TILE_AREA; +#else + const int max_tile_size = MAX_TILE_AREA; +#endif if (level_stats->max_tile_size > max_tile_size) { fail_id = TILE_TOO_LARGE; break; diff -Nru aom-3.6.0/av1/encoder/mcomp.c aom-3.6.1/av1/encoder/mcomp.c --- aom-3.6.0/av1/encoder/mcomp.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/mcomp.c 2023-05-08 18:17:52.000000000 +0000 @@ -204,17 +204,23 @@ } void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv) { - int col_min = - GET_MV_RAWPEL(mv->col) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); - int row_min = - GET_MV_RAWPEL(mv->row) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); - int col_max = GET_MV_RAWPEL(mv->col) + MAX_FULL_PEL_VAL; - int row_max = GET_MV_RAWPEL(mv->row) + MAX_FULL_PEL_VAL; - - col_min = AOMMAX(col_min, GET_MV_RAWPEL(MV_LOW) + 1); - row_min = AOMMAX(row_min, GET_MV_RAWPEL(MV_LOW) + 1); - col_max = AOMMIN(col_max, GET_MV_RAWPEL(MV_UPP) - 1); - row_max = AOMMIN(row_max, GET_MV_RAWPEL(MV_UPP) - 1); + // Calculate the outermost full-pixel MVs which are inside the limits set by + // av1_set_subpel_mv_search_range(). + // + // The subpel limits are simply mv->col +/- 8*MAX_FULL_PEL_VAL, and similar + // for mv->row. We can then divide by 8 to find the fullpel MV limits. But + // we have to be careful about the rounding. We want these bounds to be + // at least as tight as the subpel limits, which means that we must round + // the minimum values up and the maximum values down when dividing. + int col_min = ((mv->col + 7) >> 3) - MAX_FULL_PEL_VAL; + int row_min = ((mv->row + 7) >> 3) - MAX_FULL_PEL_VAL; + int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; + int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; + + col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1); + row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1); + col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1); + row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1); // Get intersection of UMV window and valid MV window to reduce # of checks // in diamond search. @@ -2088,11 +2094,11 @@ best_sad = tmp_sad; } - convert_fullmv_to_mv(best_int_mv); + FullMvLimits mv_limits = x->mv_limits; + av1_set_mv_search_range(&mv_limits, ref_mv); + clamp_fullmv(&best_int_mv->as_fullmv, &mv_limits); - SubpelMvLimits subpel_mv_limits; - av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); - clamp_mv(&best_int_mv->as_mv, &subpel_mv_limits); + convert_fullmv_to_mv(best_int_mv); if (scaled_ref_frame) { int i; diff -Nru aom-3.6.0/av1/encoder/motion_search_facade.c aom-3.6.1/av1/encoder/motion_search_facade.c --- aom-3.6.0/av1/encoder/motion_search_facade.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/motion_search_facade.c 2023-05-08 18:17:52.000000000 +0000 @@ -356,6 +356,7 @@ av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, cost_list); MV subpel_start_mv = get_mv_from_fullmv(&best_mv->as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); switch (mbmi->motion_mode) { case SIMPLE_TRANSLATION: @@ -655,6 +656,7 @@ mask, mask_stride, id); ms_params.forced_stop = EIGHTH_PEL; MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, start_mv)); bestsme = cpi->mv_search_params.find_fractional_mv_step( xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis, &sse, NULL); @@ -783,6 +785,7 @@ mask, mask_stride, ref_idx); ms_params.forced_stop = EIGHTH_PEL; MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, start_mv)); bestsme = cpi->mv_search_params.find_fractional_mv_step( xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis, &sse, NULL); } @@ -990,6 +993,7 @@ ms_params.forced_stop = cpi->sf.mv_sf.simple_motion_subpel_force_stop; MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); cpi->mv_search_params.find_fractional_mv_step( xd, cm, &ms_params, subpel_start_mv, &best_mv.as_mv, ¬_used, diff -Nru aom-3.6.0/av1/encoder/nonrd_pickmode.c aom-3.6.1/av1/encoder/nonrd_pickmode.c --- aom-3.6.0/av1/encoder/nonrd_pickmode.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/nonrd_pickmode.c 2023-05-08 18:17:52.000000000 +0000 @@ -141,7 +141,7 @@ // The original scan order (default_scan_8x8) is modified according to the extra // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and // aom_hadamard_8x8_c. -static const int16_t default_scan_8x8_transpose[64] = { +DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = { 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40, 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35, 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30, @@ -155,7 +155,8 @@ // guaranteed to scan low coefficients first, therefore we modify the scan order // accordingly. // Note that this one has to be used together with default_scan_8x8_transpose. -static const int16_t av1_default_iscan_8x8_transpose[64] = { +DECLARE_ALIGNED(16, static const int16_t, + av1_default_iscan_8x8_transpose[64]) = { 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36, 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49, 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58, @@ -165,7 +166,8 @@ // The original scan order (default_scan_16x16) is modified according to the // extra transpose in hadamard c implementation in lp case, i.e., // aom_hadamard_lp_16x16_c. -static const int16_t default_scan_lp_16x16_transpose[256] = { +DECLARE_ALIGNED(16, static const int16_t, + default_scan_lp_16x16_transpose[256]) = { 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32, 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50, 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1, @@ -191,7 +193,8 @@ // extra shift in hadamard c implementation in fp case, i.e., // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different // outputs, so we handle them separately. -static const int16_t default_scan_fp_16x16_transpose[256] = { +DECLARE_ALIGNED(16, static const int16_t, + default_scan_fp_16x16_transpose[256]) = { 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32, 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50, 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1, @@ -219,7 +222,8 @@ // such that the normal scan order is no longer guaranteed to scan low // coefficients first, therefore we modify the scan order accordingly. Note that // this one has to be used together with default_scan_lp_16x16_transpose. -static const int16_t av1_default_iscan_lp_16x16_transpose[256] = { +DECLARE_ALIGNED(16, static const int16_t, + av1_default_iscan_lp_16x16_transpose[256]) = { 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11, 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93, 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30, @@ -247,7 +251,8 @@ // such that the normal scan order is no longer guaranteed to scan low // coefficients first, therefore we modify the scan order accordingly. Note that // this one has to be used together with default_scan_fp_16x16_transpose. -static const int16_t av1_default_iscan_fp_16x16_transpose[256] = { +DECLARE_ALIGNED(16, static const int16_t, + av1_default_iscan_fp_16x16_transpose[256]) = { 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11, 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93, 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30, @@ -483,6 +488,7 @@ start_mv, fullpel_performed_well); MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); // adaptively downgrade subpel search method based on block properties if (use_aggressive_subpel_search_method( x, sf->rt_sf.use_adaptive_subpel_search, fullpel_performed_well)) @@ -574,6 +580,7 @@ subpel_select(cpi, x, bsize, &best_mv, ref_mv, start_mv, false); } MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, start_mv)); cpi->mv_search_params.find_fractional_mv_step( xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis, &x->pred_sse[ref_frame], NULL); diff -Nru aom-3.6.0/av1/encoder/pass2_strategy.c aom-3.6.1/av1/encoder/pass2_strategy.c --- aom-3.6.0/av1/encoder/pass2_strategy.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/pass2_strategy.c 2023-05-08 18:17:52.000000000 +0000 @@ -17,6 +17,7 @@ */ /*! @} - end defgroup gf_group_algo */ +#include #include #include "config/aom_config.h" @@ -938,6 +939,7 @@ // Allocate extra bits to each ARF layer int i; int layer_extra_bits[MAX_ARF_LAYERS + 1] = { 0 }; + assert(max_arf_layer <= MAX_ARF_LAYERS); for (i = 1; i <= max_arf_layer; ++i) { double fraction = (i == max_arf_layer) ? 1.0 : layer_fraction[i]; layer_extra_bits[i] = diff -Nru aom-3.6.0/av1/encoder/rd.c aom-3.6.1/av1/encoder/rd.c --- aom-3.6.0/av1/encoder/rd.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/rd.c 2023-05-08 18:17:52.000000000 +0000 @@ -1180,6 +1180,46 @@ get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left); } +// Special clamping used in the encoder when calculating a prediction +// +// Logically, all pixel fetches used for prediction are clamped against the +// edges of the frame. But doing this directly is slow, so instead we allocate +// a finite border around the frame and fill it with copies of the outermost +// pixels. +// +// Since this border is finite, we need to clamp the motion vector before +// prediction in order to avoid out-of-bounds reads. At the same time, this +// clamp must not change the prediction result. +// +// We can balance both of these concerns by calculating how far we would have +// to go in each direction before the extended prediction region (the current +// block + AOM_INTERP_EXTEND many pixels around the block) would be mapped +// so that it touches the frame only at one row or column. This is a special +// point because any more extreme MV will always lead to the same prediction. +// So it is safe to clamp at that point. +// +// In the worst case, this requires a border of +// max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels +// around the frame edges. +static INLINE void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd, + MV *mv) { + int bw = xd->width << MI_SIZE_LOG2; + int bh = xd->height << MI_SIZE_LOG2; + + int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2; + int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2; + int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2; + int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2; + + const SubpelMvLimits mv_limits = { + .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND), + .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND), + .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND), + .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND) + }; + clamp_mv(mv, &mv_limits); +} + void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; @@ -1202,7 +1242,9 @@ int max_mv = 0; // Get the sad for each candidate reference mv. for (int i = 0; i < num_mv_refs; ++i) { - const MV *this_mv = &pred_mv[i]; + MV *this_mv = &pred_mv[i]; + enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv); + const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); diff -Nru aom-3.6.0/av1/encoder/temporal_filter.c aom-3.6.1/av1/encoder/temporal_filter.c --- aom-3.6.0/av1/encoder/temporal_filter.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/temporal_filter.c 2023-05-08 18:17:52.000000000 +0000 @@ -182,6 +182,7 @@ ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE; MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); error = cpi->mv_search_params.find_fractional_mv_step( &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv, &distortion, &sse, NULL); @@ -229,6 +230,7 @@ ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE; subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); error = cpi->mv_search_params.find_fractional_mv_step( &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv, &distortion, &sse, NULL); diff -Nru aom-3.6.0/av1/encoder/tpl_model.c aom-3.6.1/av1/encoder/tpl_model.c --- aom-3.6.0/av1/encoder/tpl_model.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/tpl_model.c 2023-05-08 18:17:52.000000000 +0000 @@ -293,6 +293,7 @@ ms_params.var_params.subpel_search_type = USE_2_TAPS; ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE; MV subpel_start_mv = get_mv_from_fullmv(&best_mv->as_fullmv); + assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); bestsme = cpi->mv_search_params.find_fractional_mv_step( xd, cm, &ms_params, subpel_start_mv, &best_mv->as_mv, &distortion, &sse, NULL); diff -Nru aom-3.6.0/av1/encoder/var_based_part.c aom-3.6.1/av1/encoder/var_based_part.c --- aom-3.6.0/av1/encoder/var_based_part.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/var_based_part.c 2023-05-08 18:17:52.000000000 +0000 @@ -1013,10 +1013,10 @@ AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2, PART_EVAL_STATUS *force_split, int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds, - uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) { + uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, + bool is_key_frame) { AV1_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - const int is_key_frame = frame_is_intra_only(cm); const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64); const int num_64x64_blocks = is_small_sb ? 1 : 4; // TODO(kyslov) Bring back compute_minmax_variance with content type detection @@ -1461,7 +1461,7 @@ // for splits. fill_variance_tree_leaves(cpi, x, vt, vt2, force_split, avg_16x16, maxvar_16x16, minvar_16x16, variance4x4downsample, - thresholds, s, sp, d, dp); + thresholds, s, sp, d, dp, is_key_frame); avg_64x64 = 0; for (m = 0; m < num_64x64_blocks; ++m) { diff -Nru aom-3.6.0/av1/encoder/x86/pickrst_avx2.c aom-3.6.1/av1/encoder/x86/pickrst_avx2.c --- aom-3.6.0/av1/encoder/x86/pickrst_avx2.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/x86/pickrst_avx2.c 2023-05-08 18:17:52.000000000 +0000 @@ -789,7 +789,7 @@ } else if (params->r[0] > 0 || params->r[1] > 0) { const int xq_active = (params->r[0] > 0) ? xq[0] : xq[1]; const __m256i xq_coeff = - pair_set_epi16(xq_active, (-xq_active * (1 << SGRPROJ_RST_BITS))); + pair_set_epi16(xq_active, -xq_active * (1 << SGRPROJ_RST_BITS)); const int32_t *flt = (params->r[0] > 0) ? flt0 : flt1; const int flt_stride = (params->r[0] > 0) ? flt0_stride : flt1_stride; for (i = 0; i < height; ++i) { diff -Nru aom-3.6.0/av1/encoder/x86/pickrst_sse4.c aom-3.6.1/av1/encoder/x86/pickrst_sse4.c --- aom-3.6.0/av1/encoder/x86/pickrst_sse4.c 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/av1/encoder/x86/pickrst_sse4.c 2023-05-08 18:17:52.000000000 +0000 @@ -780,7 +780,7 @@ } else if (params->r[0] > 0 || params->r[1] > 0) { const int xq_active = (params->r[0] > 0) ? xq[0] : xq[1]; const __m128i xq_coeff = - pair_set_epi16(xq_active, -(xq_active << SGRPROJ_RST_BITS)); + pair_set_epi16(xq_active, -xq_active * (1 << SGRPROJ_RST_BITS)); const int32_t *flt = (params->r[0] > 0) ? flt0 : flt1; const int flt_stride = (params->r[0] > 0) ? flt0_stride : flt1_stride; for (i = 0; i < height; ++i) { diff -Nru aom-3.6.0/build/cmake/aom_config_defaults.cmake aom-3.6.1/build/cmake/aom_config_defaults.cmake --- aom-3.6.0/build/cmake/aom_config_defaults.cmake 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/build/cmake/aom_config_defaults.cmake 2023-05-08 18:17:52.000000000 +0000 @@ -155,6 +155,8 @@ "AV1 experiment: Enable tensorflow lite library.") set_aom_config_var(CONFIG_THREE_PASS 0 "AV1 experiment: Enable three-pass encoding.") +set_aom_config_var(CONFIG_CWG_C013 0 + "AV1 experiment: Support for 7.x and 8.x levels.") # # Variables in this section control optional features of the build system. diff -Nru aom-3.6.0/build/cmake/aom_configure.cmake aom-3.6.1/build/cmake/aom_configure.cmake --- aom-3.6.0/build/cmake/aom_configure.cmake 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/build/cmake/aom_configure.cmake 2023-05-08 18:17:52.000000000 +0000 @@ -230,9 +230,6 @@ # The default _WIN32_WINNT value in MinGW is 0x0502 (Windows XP with SP2). Set # it to 0x0601 (Windows 7). add_compiler_flag_if_supported("-D_WIN32_WINNT=0x0601") - # Prevent windows.h from defining the min and max macros. This allows us to - # use std::min and std::max. - add_compiler_flag_if_supported("-DNOMINMAX") endif() # @@ -248,7 +245,7 @@ set(HAVE_PTHREAD_H ${CMAKE_USE_PTHREADS_INIT}) aom_check_source_compiles("unistd_check" "#include " HAVE_UNISTD_H) -if(NOT MSVC) +if(NOT WIN32) aom_push_var(CMAKE_REQUIRED_LIBRARIES "m") aom_check_c_compiles("fenv_check" "#define _GNU_SOURCE #include @@ -300,7 +297,16 @@ endif() else() require_c_flag("-std=c99" YES) - require_cxx_flag_nomsvc("-std=c++11" YES) + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" + AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU" + AND CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC") + # Microsoft's C++ Standard Library requires C++14 as it's MSVC's default and + # minimum supported C++ version. If Clang is using this Standard Library + # implementation, it cannot target C++11. + require_cxx_flag_nomsvc("-std=c++14" YES) + else() + require_cxx_flag_nomsvc("-std=c++11" YES) + endif() add_compiler_flag_if_supported("-Wall") add_compiler_flag_if_supported("-Wdisabled-optimization") add_compiler_flag_if_supported("-Wextra") diff -Nru aom-3.6.0/build/cmake/aom_optimization.cmake aom-3.6.1/build/cmake/aom_optimization.cmake --- aom-3.6.0/build/cmake/aom_optimization.cmake 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/build/cmake/aom_optimization.cmake 2023-05-08 18:17:52.000000000 +0000 @@ -46,7 +46,11 @@ add_library(${target_name} OBJECT ${${sources}}) set_property(TARGET ${target_name} PROPERTY FOLDER ${AOM_TARGET_CPU}) - if(MSVC) + # MSVC does not need flags for intrinsics flavors other than AVX/AVX2. + # However, for clang-cl, the default is SSE2, and the MSVC frontend does not + # provide any flags to enable SSE3 up to SSE4.1. So we need to restrict the + # usage of MSVC-style flags to only the real MSVC. + if(CMAKE_C_COMPILER_ID STREQUAL "MSVC") get_msvc_intrinsic_flag("${flag}" "flag") endif() diff -Nru aom-3.6.0/build/cmake/exports.cmake aom-3.6.1/build/cmake/exports.cmake --- aom-3.6.0/build/cmake/exports.cmake 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/build/cmake/exports.cmake 2023-05-08 18:17:52.000000000 +0000 @@ -61,11 +61,13 @@ APPEND_STRING PROPERTY LINK_FLAGS "/DEF:${aom_sym_file}") else() - target_sources(aom PRIVATE "${aom_sym_file}") + # For MinGW and MSYS compilers, you can use either version scripts or + # module definition files. If the latter, it must be supplied as an + # "object". + set_property(TARGET aom + APPEND_STRING + PROPERTY LINK_FLAGS "${aom_sym_file}") endif() - - # TODO(tomfinegan): Sort out the import lib situation and flags for MSVC. - else() set_property(TARGET aom APPEND_STRING diff -Nru aom-3.6.0/CHANGELOG aom-3.6.1/CHANGELOG --- aom-3.6.0/CHANGELOG 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/CHANGELOG 2023-05-08 18:17:52.000000000 +0000 @@ -1,3 +1,27 @@ +2023-05-08 v3.6.1 + This release includes several bug fixes. This release is ABI + compatible with the last release. See + https://aomedia.googlesource.com/aom/+log/v3.6.0..v3.6.1 for all the + commits in this release. + + - Bug Fixes + * aomedia:2871: Guard the support of the 7.x and 8.x levels for AV1 + under the CONFIG_CWG_C013 config flag, and only output the 7.x and + 8.x levels when explicitly requested. + * aomedia:3382: Choose sb_size by ppi instead of svc. + * aomedia:3384: Fix fullpel search limits. + * aomedia:3388: Replace left shift of xq_active by multiplication. + * aomedia:3389: Fix MV clamping in av1_mv_pred. + * aomedia:3390: set_ld_layer_depth: cap max_layer_depth to + MAX_ARF_LAYERS. + * aomedia:3418: Fix MV clamping in av1_int_pro_motion_estimation. + * aomedia:3429: Move lpf thread data init to lpf_pipeline_mt_init(). + * b:266719111: Fix undefined behavior in Arm Neon code. + * b:269840681: nonrd_opt: align scan tables. + * rtc: Fix is_key_frame setting in variance partition. + * Build: Fix build with clang-cl and Visual Studio. + * Build: Fix module definition file for MinGW/MSYS. + 2023-02-03 v3.6.0 This release includes compression efficiency and perceptual quality improvements, speedup and memory optimizations, and some new features. diff -Nru aom-3.6.0/CMakeLists.txt aom-3.6.1/CMakeLists.txt --- aom-3.6.0/CMakeLists.txt 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/CMakeLists.txt 2023-05-08 18:17:52.000000000 +0000 @@ -52,7 +52,7 @@ # # We set SO_FILE_VERSION = [c-a].a.r set(LT_CURRENT 9) -set(LT_REVISION 0) +set(LT_REVISION 1) set(LT_AGE 6) math(EXPR SO_VERSION "${LT_CURRENT} - ${LT_AGE}") set(SO_FILE_VERSION "${SO_VERSION}.${LT_AGE}.${LT_REVISION}") @@ -297,7 +297,7 @@ endif() endif() -if(NOT MSVC AND NOT APPLE) +if(NOT WIN32 AND NOT APPLE) target_link_libraries(aom ${AOM_LIB_LINK_TYPE} m) if(BUILD_SHARED_LIBS) target_link_libraries(aom_static ${AOM_LIB_LINK_TYPE} m) @@ -309,7 +309,7 @@ "${AOM_ROOT}/av1/ratectrl_rtc.cc") add_library(aom_av1_rc ${AOM_AV1_RC_SOURCES}) target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} aom) - if(NOT MSVC AND NOT APPLE) + if(NOT WIN32 AND NOT APPLE) target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} m) endif() endif() diff -Nru aom-3.6.0/debian/changelog aom-3.6.1/debian/changelog --- aom-3.6.0/debian/changelog 2023-02-11 15:25:25.000000000 +0000 +++ aom-3.6.1/debian/changelog 2023-06-21 17:13:04.000000000 +0000 @@ -1,3 +1,15 @@ +aom (3.6.1-1) unstable; urgency=medium + + * New upstream release. + * debian/control: + + Build-depends on system libwebm-dev. + + Restrict libwebm-dev version requirement to 1.0.0.30 or higher + due to requirement of new header file layout. + * debian/patches: Add patch to use system libwebm instead of + bundled one. (Closes: #1030891) + + -- Boyuan Yang Wed, 21 Jun 2023 13:13:04 -0400 + aom (3.6.0-1) unstable; urgency=medium * New upstream release. diff -Nru aom-3.6.0/debian/control aom-3.6.1/debian/control --- aom-3.6.0/debian/control 2022-06-19 14:43:27.000000000 +0000 +++ aom-3.6.1/debian/control 2023-06-21 17:13:04.000000000 +0000 @@ -7,11 +7,12 @@ cmake (>= 3.6), debhelper-compat (= 13), libyuv-dev, + libwebm-dev (>= 1.0.0.30-2~), yasm [any-amd64 any-i386], Build-Depends-Indep: doxygen, Rules-Requires-Root: no -Standards-Version: 4.6.1 +Standards-Version: 4.6.2 Homepage: https://aomedia.googlesource.com/aom/ Vcs-Git: https://salsa.debian.org/multimedia-team/aom.git Vcs-Browser: https://salsa.debian.org/multimedia-team/aom diff -Nru aom-3.6.0/debian/patches/0002-use-system-libyuv.patch aom-3.6.1/debian/patches/0002-use-system-libyuv.patch --- aom-3.6.0/debian/patches/0002-use-system-libyuv.patch 2023-02-11 15:25:25.000000000 +0000 +++ aom-3.6.1/debian/patches/0002-use-system-libyuv.patch 2023-06-21 17:13:04.000000000 +0000 @@ -11,7 +11,7 @@ 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 8324401..14950de 100644 +index 87d88fa..68e146a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -459,11 +459,12 @@ if(CONFIG_AV1_DECODER AND ENABLE_EXAMPLES) diff -Nru aom-3.6.0/debian/patches/0003-use-system-libwebm.patch aom-3.6.1/debian/patches/0003-use-system-libwebm.patch --- aom-3.6.0/debian/patches/0003-use-system-libwebm.patch 1970-01-01 00:00:00.000000000 +0000 +++ aom-3.6.1/debian/patches/0003-use-system-libwebm.patch 2023-06-21 17:13:04.000000000 +0000 @@ -0,0 +1,99 @@ +From: Boyuan Yang +Date: Wed, 21 Jun 2023 13:10:50 -0400 +Subject: use system libwebm + +--- + CMakeLists.txt | 19 +++++++++++-------- + common/webmdec.cc | 4 ++-- + common/webmenc.cc | 6 +++--- + test/test.cmake | 3 ++- + 4 files changed, 18 insertions(+), 14 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 68e146a..c08b980 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -736,26 +736,29 @@ if(ENABLE_EXAMPLES OR ENABLE_TESTS OR ENABLE_TOOLS) + endif() + + if(CONFIG_WEBM_IO) +- add_library(webm OBJECT ${AOM_LIBWEBM_SOURCES}) +- include_directories("${AOM_ROOT}/third_party/libwebm") +- target_compile_definitions(webm PRIVATE __STDC_CONSTANT_MACROS) +- target_compile_definitions(webm PRIVATE __STDC_LIMIT_MACROS) +- +- if(NOT MSVC) +- target_compile_options(webm PRIVATE -Wno-shadow) ++ target_link_libraries(aom ${AOM_LIB_LINK_TYPE} webm) ++ target_include_directories(aom PRIVATE /usr/include/webm) ++ if(BUILD_SHARED_LIBS) ++ target_link_libraries(aom_static ${AOM_LIB_LINK_TYPE} webm) ++ target_include_directories(aom_static PRIVATE /usr/include/webm) + endif() + + # Add to existing targets. + if(CONFIG_AV1_DECODER) ++ target_link_libraries(aom_decoder_app_util ${AOM_LIB_LINK_TYPE} webm) ++ target_include_directories(aom_decoder_app_util PRIVATE /usr/include/webm) + target_sources(aom_decoder_app_util PRIVATE ${AOM_WEBM_DECODER_SOURCES}) + endif() + + if(CONFIG_AV1_ENCODER) ++ target_link_libraries(aom_encoder_app_util ${AOM_LIB_LINK_TYPE} webm) ++ target_include_directories(aom_encoder_app_util PRIVATE /usr/include/webm) + target_sources(aom_encoder_app_util PRIVATE ${AOM_WEBM_ENCODER_SOURCES}) + endif() + + foreach(aom_app ${AOM_APP_TARGETS}) +- target_sources(${aom_app} PRIVATE $) ++ target_include_directories(${aom_app} PRIVATE /usr/include/webm) ++ target_link_libraries(${aom_app} PRIVATE webm) + set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX) + endforeach() + endif() +diff --git a/common/webmdec.cc b/common/webmdec.cc +index 33bda59..27483c5 100644 +--- a/common/webmdec.cc ++++ b/common/webmdec.cc +@@ -15,8 +15,8 @@ + #include + #include + +-#include "third_party/libwebm/mkvparser/mkvparser.h" +-#include "third_party/libwebm/mkvparser/mkvreader.h" ++#include ++#include + + namespace { + +diff --git a/common/webmenc.cc b/common/webmenc.cc +index bb754e8..72f48d5 100644 +--- a/common/webmenc.cc ++++ b/common/webmenc.cc +@@ -19,9 +19,9 @@ + #include + + #include "common/av1_config.h" +-#include "third_party/libwebm/mkvmuxer/mkvmuxer.h" +-#include "third_party/libwebm/mkvmuxer/mkvmuxerutil.h" +-#include "third_party/libwebm/mkvmuxer/mkvwriter.h" ++#include ++#include ++#include + + namespace { + const uint64_t kDebugTrackUid = 0xDEADBEEF; +diff --git a/test/test.cmake b/test/test.cmake +index b3c99b9..fb452c3 100644 +--- a/test/test.cmake ++++ b/test/test.cmake +@@ -482,7 +482,8 @@ function(setup_aom_test_targets) + # target_sources(test_libaom PRIVATE $) + # endif() + if(CONFIG_WEBM_IO) +- target_sources(test_libaom PRIVATE $) ++ target_include_directories(test_libaom PRIVATE /usr/include/webm) ++ target_link_libraries(test_libaom ${AOM_LIB_LINK_TYPE} webm) + endif() + if(HAVE_SSE2) + add_intrinsics_source_to_target("-msse2" "test_libaom" diff -Nru aom-3.6.0/debian/patches/series aom-3.6.1/debian/patches/series --- aom-3.6.0/debian/patches/series 2023-02-11 15:25:25.000000000 +0000 +++ aom-3.6.1/debian/patches/series 2023-06-21 17:13:04.000000000 +0000 @@ -1,2 +1,3 @@ 0001-doc-Use-libjs-mathjax-rather-than-cloudflare-copy.patch 0002-use-system-libyuv.patch +0003-use-system-libwebm.patch diff -Nru aom-3.6.0/debian/rules aom-3.6.1/debian/rules --- aom-3.6.0/debian/rules 2023-02-01 02:29:31.000000000 +0000 +++ aom-3.6.1/debian/rules 2023-06-21 17:13:04.000000000 +0000 @@ -38,9 +38,10 @@ %: dh $@ -Bbuild-debian -# Force building with system libyuv +# Force building with system libyuv and webm execute_before_dh_auto_configure: rm -rf $(CURDIR)/third_party/libyuv + rm -rf $(CURDIR)/third_party/libwebm # Examples and tools not built because they require various private # symbols from libaom.so which we filter out. diff -Nru aom-3.6.0/test/avif_progressive_test.cc aom-3.6.1/test/avif_progressive_test.cc --- aom-3.6.0/test/avif_progressive_test.cc 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/avif_progressive_test.cc 2023-05-08 18:17:52.000000000 +0000 @@ -181,4 +181,88 @@ EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); } +TEST(AVIFProgressiveTest, DimensionChangeLargeImageMultiThread) { + constexpr int kWidth = 1920; + constexpr int kHeight = 1080; + // Dummy buffer of neutral gray samples. + constexpr size_t kBufferSize = 2 * kWidth * kHeight; + std::vector buffer(kBufferSize, + static_cast(128)); + + aom_image_t img; + EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I420, kWidth, kHeight, 1, + buffer.data())); + img.cp = AOM_CICP_CP_UNSPECIFIED; + img.tc = AOM_CICP_TC_UNSPECIFIED; + img.mc = AOM_CICP_MC_UNSPECIFIED; + img.range = AOM_CR_FULL_RANGE; + + aom_codec_iface_t *iface = aom_codec_av1_cx(); + aom_codec_enc_cfg_t cfg; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY)); + cfg.g_profile = 0; + cfg.g_w = img.w; + cfg.g_h = img.h; + cfg.g_bit_depth = AOM_BITS_8; + cfg.g_input_bit_depth = 8; + cfg.g_lag_in_frames = 0; + cfg.g_threads = 2; // MultiThread + cfg.rc_end_usage = AOM_Q; + cfg.rc_min_quantizer = 0; + cfg.rc_max_quantizer = 63; + aom_codec_ctx_t enc; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_enc_init(&enc, iface, &cfg, 0)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 31)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_ROW_MT, 1)); // MultiThread + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE)); + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_TUNING, AOM_TUNE_SSIM)); + + // First frame (layer 0) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 0)); + aom_scaling_mode_t scaling_mode = { AOME_ONETWO, AOME_ONETWO }; + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SCALEMODE, &scaling_mode)); + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, 0)); + aom_codec_iter_t iter = nullptr; + const aom_codec_cx_pkt_t *pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0x1f0011. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, AOM_FRAME_IS_KEY); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Second frame (layer 1) + EXPECT_EQ(AOM_CODEC_OK, + aom_codec_control(&enc, AOME_SET_SPATIAL_LAYER_ID, 1)); + aom_enc_frame_flags_t encode_flags = + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF; + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, &img, 0, 1, encode_flags)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_NE(pkt, nullptr); + EXPECT_EQ(pkt->kind, AOM_CODEC_CX_FRAME_PKT); + // pkt->data.frame.flags is 0. + EXPECT_EQ(pkt->data.frame.flags & AOM_FRAME_IS_KEY, 0u); + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + // Flush encoder + EXPECT_EQ(AOM_CODEC_OK, aom_codec_encode(&enc, nullptr, 0, 1, 0)); + iter = nullptr; + pkt = aom_codec_get_cx_data(&enc, &iter); + EXPECT_EQ(pkt, nullptr); + + EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); +} + } // namespace diff -Nru aom-3.6.0/test/invalid_file_test.cc aom-3.6.1/test/invalid_file_test.cc --- aom-3.6.0/test/invalid_file_test.cc 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/invalid_file_test.cc 2023-05-08 18:17:52.000000000 +0000 @@ -146,7 +146,11 @@ { 1, "invalid-oss-fuzz-10227.ivf", nullptr }, { 4, "invalid-oss-fuzz-10555.ivf", nullptr }, { 1, "invalid-oss-fuzz-10705.ivf", nullptr }, +#if CONFIG_CWG_C013 { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.3" }, +#else + { 1, "invalid-oss-fuzz-10723.ivf", "invalid-oss-fuzz-10723.ivf.res.2" }, +#endif { 1, "invalid-oss-fuzz-10779.ivf", nullptr }, { 1, "invalid-oss-fuzz-11477.ivf", nullptr }, { 1, "invalid-oss-fuzz-11479.ivf", "invalid-oss-fuzz-11479.ivf.res.2" }, diff -Nru aom-3.6.0/test/level_test.cc aom-3.6.1/test/level_test.cc --- aom-3.6.0/test/level_test.cc 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/level_test.cc 2023-05-08 18:17:52.000000000 +0000 @@ -87,8 +87,8 @@ for (int operating_point = 0; operating_point <= 32; ++operating_point) { for (int level = 0; level <= 32; ++level) { const int target_level = operating_point * 100 + level; - if ((level < 28 && level != 2 && level != 3 && level != 6 && level != 7 && - level != 10 && level != 11) || + if ((level < (CONFIG_CWG_C013 ? 28 : 20) && level != 2 && level != 3 && + level != 6 && level != 7 && level != 10 && level != 11) || level == kLevelMax || level == kLevelKeepStats || operating_point > 31) { EXPECT_EQ(AOM_CODEC_OK, diff -Nru aom-3.6.0/test/register_state_check.h aom-3.6.1/test/register_state_check.h --- aom-3.6.0/test/register_state_check.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/register_state_check.h 2023-05-08 18:17:52.000000000 +0000 @@ -30,6 +30,7 @@ #undef NOMINMAX #define NOMINMAX +#undef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #include #include diff -Nru aom-3.6.0/test/resize_test.cc aom-3.6.1/test/resize_test.cc --- aom-3.6.0/test/resize_test.cc 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/resize_test.cc 2023-05-08 18:17:52.000000000 +0000 @@ -377,13 +377,15 @@ ::testing::Values(::libaom_test::kOnePassGood)); #endif +// Parameters: test mode, speed, threads class ResizeRealtimeTest - : public ::libaom_test::CodecTestWith2Params, + : public ::libaom_test::CodecTestWith3Params, public ::libaom_test::EncoderTest { protected: ResizeRealtimeTest() - : EncoderTest(GET_PARAM(0)), set_scale_mode_(false), - set_scale_mode2_(false) {} + : EncoderTest(GET_PARAM(0)), num_threads_(GET_PARAM(3)), + set_scale_mode_(false), set_scale_mode2_(false) {} virtual ~ResizeRealtimeTest() {} virtual void PreEncodeFrameHook(libaom_test::VideoSource *video, @@ -457,6 +459,7 @@ cfg_.rc_dropframe_thresh = 1; // Disable error_resilience mode. cfg_.g_error_resilient = 0; + cfg_.g_threads = num_threads_; // Run at low bitrate. cfg_.rc_target_bitrate = 200; // We use max(kInitialWidth, kInitialHeight) because during the test @@ -472,6 +475,7 @@ std::vector frame_info_list_; int set_cpu_used_; + int num_threads_; bool change_bitrate_; unsigned int frame_change_bitrate_; double mismatch_psnr_; @@ -864,7 +868,7 @@ ::testing::Values(::libaom_test::kRealTime)); AV1_INSTANTIATE_TEST_SUITE(ResizeRealtimeTest, ::testing::Values(::libaom_test::kRealTime), - ::testing::Range(6, 10)); + ::testing::Range(6, 10), ::testing::Values(1, 2, 4)); AV1_INSTANTIATE_TEST_SUITE(ResizeCspTest, ::testing::Values(::libaom_test::kRealTime)); diff -Nru aom-3.6.0/test/test-data.sha1 aom-3.6.1/test/test-data.sha1 --- aom-3.6.0/test/test-data.sha1 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/test-data.sha1 2023-05-08 18:17:52.000000000 +0000 @@ -22,6 +22,7 @@ cf5945085fe85456a1f74bf4cc7998b88b3f4b62 *invalid-oss-fuzz-10705.ivf 758671858368ffd2a2c0727898de5661f7cf7d68 *invalid-oss-fuzz-10705.ivf.res 88e29851122cca3f336824f7fa4d9f757f91110c *invalid-oss-fuzz-10723.ivf +64f8a208dec7f1580fbe0371aa15e62bb1262715 *invalid-oss-fuzz-10723.ivf.res.2 1af486cd2cc83ebeddc76ca7a1c512cc0ec568d5 *invalid-oss-fuzz-10723.ivf.res.3 0784acc8931090ec24eba752d6c27e359e68fe7d *invalid-oss-fuzz-10779.ivf 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-oss-fuzz-10779.ivf.res diff -Nru aom-3.6.0/test/test_data_util.cmake aom-3.6.1/test/test_data_util.cmake --- aom-3.6.0/test/test_data_util.cmake 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/test_data_util.cmake 2023-05-08 18:17:52.000000000 +0000 @@ -552,6 +552,7 @@ "invalid-oss-fuzz-10705.ivf" "invalid-oss-fuzz-10705.ivf.res" "invalid-oss-fuzz-10723.ivf" + "invalid-oss-fuzz-10723.ivf.res.2" "invalid-oss-fuzz-10723.ivf.res.3" "invalid-oss-fuzz-10779.ivf" "invalid-oss-fuzz-10779.ivf.res" diff -Nru aom-3.6.0/test/video_source.h aom-3.6.1/test/video_source.h --- aom-3.6.0/test/video_source.h 2023-02-04 07:43:20.000000000 +0000 +++ aom-3.6.1/test/video_source.h 2023-05-08 18:17:52.000000000 +0000 @@ -14,6 +14,7 @@ #if defined(_WIN32) #undef NOMINMAX #define NOMINMAX +#undef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #include #endif