diff -Nru libvpx-0.9.5/args.c libvpx-0.9.6/args.c --- libvpx-0.9.5/args.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/args.c 2011-03-04 20:40:37.000000000 +0000 @@ -135,6 +135,17 @@ def->long_name, long_val); fprintf(fp, " %-37s\t%s\n", option_text, def->desc); + + if(def->enums) + { + const struct arg_enum_list *listptr; + + fprintf(fp, " %-37s\t ", ""); + + for(listptr = def->enums; listptr->name; listptr++) + fprintf(fp, "%s%s", listptr->name, + listptr[1].name ? ", " : "\n"); + } } } @@ -218,3 +229,37 @@ return rat; } + + +int arg_parse_enum(const struct arg *arg) +{ + const struct arg_enum_list *listptr; + long int rawval; + char *endptr; + + /* First see if the value can be parsed as a raw value */ + rawval = strtol(arg->val, &endptr, 10); + if (arg->val[0] != '\0' && endptr[0] == '\0') + { + /* Got a raw value, make sure it's valid */ + for(listptr = arg->def->enums; listptr->name; listptr++) + if(listptr->val == rawval) + return rawval; + } + + /* Next see if it can be parsed as a string */ + for(listptr = arg->def->enums; listptr->name; listptr++) + if(!strcmp(arg->val, listptr->name)) + return listptr->val; + + die("Option %s: Invalid value '%s'\n", arg->name, arg->val); + return 0; +} + + +int arg_parse_enum_or_int(const struct arg *arg) +{ + if(arg->def->enums) + return arg_parse_enum(arg); + return arg_parse_int(arg); +} diff -Nru libvpx-0.9.5/args.h libvpx-0.9.6/args.h --- libvpx-0.9.5/args.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/args.h 2011-03-04 20:40:37.000000000 +0000 @@ -22,14 +22,23 @@ const struct arg_def *def; }; +struct arg_enum_list +{ + const char *name; + int val; +}; +#define ARG_ENUM_LIST_END {0} + typedef struct arg_def { const char *short_name; const char *long_name; int has_val; const char *desc; + const struct arg_enum_list *enums; } arg_def_t; -#define ARG_DEF(s,l,v,d) {s,l,v,d} +#define ARG_DEF(s,l,v,d) {s,l,v,d, NULL} +#define ARG_DEF_ENUM(s,l,v,d,e) {s,l,v,d,e} #define ARG_DEF_LIST_END {0} struct arg arg_init(char **argv); @@ -41,4 +50,5 @@ unsigned int arg_parse_uint(const struct arg *arg); int arg_parse_int(const struct arg *arg); struct vpx_rational arg_parse_rational(const struct arg *arg); +int arg_parse_enum_or_int(const struct arg *arg); #endif diff -Nru libvpx-0.9.5/AUTHORS libvpx-0.9.6/AUTHORS --- libvpx-0.9.5/AUTHORS 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/AUTHORS 2011-03-04 20:40:37.000000000 +0000 @@ -4,13 +4,18 @@ Aaron Watry Adrian Grange Alex Converse +Andoni Morales Alastruey Andres Mejia +Attila Nagy Fabio Pedretti Frank Galligan Fredrik Söderquist Fritz Koenig +Gaute Strokkenes Giuseppe Scrivano Guillermo Ballester Valor +Henrik Lundin +James Berry James Zern Jan Kratochvil Jeff Muizelaar @@ -23,10 +28,14 @@ Makoto Kato Martin Ettl Michael Kohler +Mikhal Shemer +Pascal Massimino +Patrik Westin Paul Wilkins Pavol Rusnak Philip Jägenstedt Scott LaVarnway +Tero Rintaluoma Timothy B. Terriberry Tom Finegan Yaowu Xu diff -Nru libvpx-0.9.5/build/arm-wince-vs8/armasmv5.rules libvpx-0.9.6/build/arm-wince-vs8/armasmv5.rules --- libvpx-0.9.5/build/arm-wince-vs8/armasmv5.rules 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/arm-wince-vs8/armasmv5.rules 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ - - - - - - - - - diff -Nru libvpx-0.9.5/build/arm-wince-vs8/armasmv6.rules libvpx-0.9.6/build/arm-wince-vs8/armasmv6.rules --- libvpx-0.9.5/build/arm-wince-vs8/armasmv6.rules 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/arm-wince-vs8/armasmv6.rules 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ - - - - - - - - - diff -Nru libvpx-0.9.5/build/arm-wince-vs8/armasmxscale.rules libvpx-0.9.6/build/arm-wince-vs8/armasmxscale.rules --- libvpx-0.9.5/build/arm-wince-vs8/armasmxscale.rules 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/arm-wince-vs8/armasmxscale.rules 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ - - - - - - - - - diff -Nru libvpx-0.9.5/build/arm-wince-vs8/obj_int_extract.bat libvpx-0.9.6/build/arm-wince-vs8/obj_int_extract.bat --- libvpx-0.9.5/build/arm-wince-vs8/obj_int_extract.bat 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/arm-wince-vs8/obj_int_extract.bat 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -@echo off -REM Copyright (c) 2010 The WebM project authors. All Rights Reserved. -REM -REM Use of this source code is governed by a BSD-style license -REM that can be found in the LICENSE file in the root of the source -REM tree. An additional intellectual property rights grant can be found -REM in the file PATENTS. All contributing project authors may -REM be found in the AUTHORS file in the root of the source tree. -echo on - - -cl /I ".\\" /I "..\vp6_decoder_sdk" /I "..\vp6_decoder_sdk\vpx_ports" /D "NDEBUG" /D "_WIN32_WCE=0x420" /D "UNDER_CE" /D "WIN32_PLATFORM_PSPC" /D "WINCE" /D "_LIB" /D "ARM" /D "_ARM_" /D "_UNICODE" /D "UNICODE" /FD /EHsc /MT /GS- /fp:fast /GR- /Fo"Pocket_PC_2003__ARMV4_\%1/" /Fd"Pocket_PC_2003__ARMV4_\%1/vc80.pdb" /W3 /nologo /c /TC ..\vp6_decoder_sdk\vp6_decoder\algo\common\arm\dec_asm_offsets_arm.c -obj_int_extract.exe rvds "Pocket_PC_2003__ARMV4_\%1/dec_asm_offsets_arm.obj" diff -Nru libvpx-0.9.5/build/arm-wince-vs8/vpx.sln libvpx-0.9.6/build/arm-wince-vs8/vpx.sln --- libvpx-0.9.5/build/arm-wince-vs8/vpx.sln 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/arm-wince-vs8/vpx.sln 1970-01-01 00:00:00.000000000 +0000 @@ -1,88 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 9.00 -# Visual Studio 2005 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcproj", "{BA5FE66F-38DD-E034-F542-B1578C5FB950}" - ProjectSection(ProjectDependencies) = postProject - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} = {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} - {E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_int_extract", "obj_int_extract.vcproj", "{E1360C65-D375-4335-8057-7ED99CC3F9B2}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx", "vpx.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}" - ProjectSection(ProjectDependencies) = postProject - {E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "xma", "xma.vcproj", "{A955FC4A-73F1-44F7-135E-30D84D32F022}" - ProjectSection(ProjectDependencies) = postProject - {E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2} - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} = {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74} - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Mixed Platforms = Debug|Mixed Platforms - Debug|Pocket PC 2003 (ARMV4) = Debug|Pocket PC 2003 (ARMV4) - Debug|Win32 = Debug|Win32 - Release|Mixed Platforms = Release|Mixed Platforms - Release|Pocket PC 2003 (ARMV4) = Release|Pocket PC 2003 (ARMV4) - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {BA5FE66F-38DD-E034-F542-B1578C5FB950}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Mixed Platforms.ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Mixed Platforms.Build.0 = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Win32.ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Debug|Win32.Build.0 = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Mixed Platforms.ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Mixed Platforms.Build.0 = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Win32.ActiveCfg = Release|Win32 - {E1360C65-D375-4335-8057-7ED99CC3F9B2}.Release|Win32.Build.0 = Release|Win32 - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.Build.0 = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Mixed Platforms.Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).Build.0 = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Pocket PC 2003 (ARMV4).Deploy.0 = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Debug|Win32.ActiveCfg = Debug|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.Build.0 = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Mixed Platforms.Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).ActiveCfg = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).Build.0 = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Pocket PC 2003 (ARMV4).Deploy.0 = Release|Pocket PC 2003 (ARMV4) - {A955FC4A-73F1-44F7-135E-30D84D32F022}.Release|Win32.ActiveCfg = Release|Pocket PC 2003 (ARMV4) - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff -Nru libvpx-0.9.5/build/make/armlink_adapter.sh libvpx-0.9.6/build/make/armlink_adapter.sh --- libvpx-0.9.5/build/make/armlink_adapter.sh 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/make/armlink_adapter.sh 2011-03-04 20:40:37.000000000 +0000 @@ -17,15 +17,17 @@ on_of=1 elif [ "$i" == "-v" ]; then verbose=1 + elif [ "$i" == "-g" ]; then + args="${args} --debug" elif [ "$on_of" == "1" ]; then outfile=$i - on_of=0 + on_of=0 elif [ -f "$i" ]; then infiles="$infiles $i" elif [ "${i:0:2}" == "-l" ]; then libs="$libs ${i#-l}" elif [ "${i:0:2}" == "-L" ]; then - libpaths="${libpaths} ${i#-L}" + libpaths="${libpaths} ${i#-L}" else args="${args} ${i}" fi diff -Nru libvpx-0.9.5/build/make/configure.sh libvpx-0.9.6/build/make/configure.sh --- libvpx-0.9.5/build/make/configure.sh 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/make/configure.sh 2011-03-04 20:40:37.000000000 +0000 @@ -78,6 +78,7 @@ --log=yes|no|FILE file configure log is written to [config.err] --target=TARGET target platform tuple [generic-gnu] --cpu=CPU optimize for a specific cpu rather than a family + --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS] ${toggle_extra_warnings} emit harmless warnings (always non-fatal) ${toggle_werror} treat warnings as errors, if possible (not available with all compilers) @@ -442,6 +443,9 @@ ;; --cpu=*) tune_cpu="$optval" ;; + --extra-cflags=*) + extra_cflags="${optval}" + ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` echo "${CMDLINE_SELECT} ${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null || die_unknown $opt @@ -547,6 +551,10 @@ tgt_isa=universal tgt_os=darwin9 ;; + *darwin10*) + tgt_isa=x86_64 + tgt_os=darwin10 + ;; *mingw32*|*cygwin*) [ -z "$tgt_isa" ] && tgt_isa=x86 tgt_os=win32 @@ -606,10 +614,20 @@ add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk" add_ldflags "-mmacosx-version-min=10.5" ;; + *-darwin10-*) + add_cflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk" + add_cflags "-mmacosx-version-min=10.6" + add_ldflags "-isysroot /Developer/SDKs/MacOSX10.6.sdk" + add_ldflags "-mmacosx-version-min=10.6" + ;; esac # Handle Solaris variants. Solaris 10 needs -lposix4 case ${toolchain} in + sparc-solaris-*) + add_extralibs -lposix4 + add_cflags "-DMUST_BE_ALIGNED" + ;; *-solaris-*) add_extralibs -lposix4 ;; @@ -650,12 +668,12 @@ elif enabled armv7 then check_add_cflags -march=armv7-a -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-ftree-vectorize - check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a + check_add_asflags -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp #-march=armv7-a else check_add_cflags -march=${tgt_isa} check_add_asflags -march=${tgt_isa} fi - + enabled debug && add_asflags -g asm_conversion_cmd="${source_path}/build/make/ads2gas.pl" ;; rvct) @@ -680,16 +698,24 @@ arch_int=${tgt_isa##armv} arch_int=${arch_int%%te} check_add_asflags --pd "\"ARCHITECTURE SETA ${arch_int}\"" + enabled debug && add_asflags -g + add_cflags --gnu + add_cflags --enum_is_int + add_cflags --wchar32 ;; esac case ${tgt_os} in + none*) + disable multithread + disable os_support + ;; darwin*) SDK_PATH=/Developer/Platforms/iPhoneOS.platform/Developer TOOLCHAIN_PATH=${SDK_PATH}/usr/bin CC=${TOOLCHAIN_PATH}/gcc AR=${TOOLCHAIN_PATH}/ar - LD=${TOOLCHAIN_PATH}/arm-apple-darwin9-gcc-4.2.1 + LD=${TOOLCHAIN_PATH}/arm-apple-darwin10-gcc-4.2.1 AS=${TOOLCHAIN_PATH}/as STRIP=${TOOLCHAIN_PATH}/strip NM=${TOOLCHAIN_PATH}/nm @@ -703,14 +729,14 @@ add_cflags -arch ${tgt_isa} add_ldflags -arch_only ${tgt_isa} - add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.1.sdk" + add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.sdk" # This should be overridable - alt_libc=${SDK_PATH}/SDKs/iPhoneOS3.1.sdk + alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.2.sdk # Add the paths for the alternate libc # for d in usr/include usr/include/gcc/darwin/4.0/; do - for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin9/4.0.1/include/; do + for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do try_dir="${alt_libc}/${d}" [ -d "${try_dir}" ] && add_cflags -I"${try_dir}" done @@ -732,13 +758,9 @@ || die "Must supply --libc when targetting *-linux-rvct" # Set up compiler - add_cflags --gnu - add_cflags --enum_is_int add_cflags --library_interface=aeabi_glibc add_cflags --no_hide_all - add_cflags --wchar32 add_cflags --dwarf2 - add_cflags --gnu # Set up linker add_ldflags --sysv --no_startup --no_ref_cpp_init @@ -824,6 +846,7 @@ soft_enable sse2 soft_enable sse3 soft_enable ssse3 + soft_enable sse4_1 case ${tgt_os} in win*) @@ -844,7 +867,7 @@ setup_gnu_toolchain add_cflags -use-msasm -use-asm add_ldflags -i-static - enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3 + enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2 enabled x86_64 && AR=xiar case ${tune_cpu} in atom*) @@ -879,7 +902,7 @@ case ${tgt_os} in win*) add_asflags -f win${bits} - enabled debug && add_asflags -g dwarf2 + enabled debug && add_asflags -g cv8 ;; linux*|solaris*) add_asflags -f elf${bits} @@ -961,6 +984,12 @@ add_cflags -D_LARGEFILE_SOURCE add_cflags -D_FILE_OFFSET_BITS=64 fi + + # append any user defined extra cflags + if [ -n "${extra_cflags}" ] ; then + check_add_cflags ${extra_cflags} || \ + die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler" + fi } process_toolchain() { diff -Nru libvpx-0.9.5/build/make/gen_msvs_proj.sh libvpx-0.9.6/build/make/gen_msvs_proj.sh --- libvpx-0.9.5/build/make/gen_msvs_proj.sh 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/make/gen_msvs_proj.sh 2011-03-04 20:40:37.000000000 +0000 @@ -32,7 +32,7 @@ --name=project_name Name of the project (required) --proj-guid=GUID GUID to use for the project --module-def=filename File containing export definitions (for DLLs) - --ver=version Version (7,8) of visual studio to generate for + --ver=version Version (7,8,9) of visual studio to generate for -Ipath/to/include Additional include directories -DFLAG[=value] Preprocessor macros to define -Lpath/to/lib Additional library search paths @@ -132,7 +132,7 @@ open_tag Filter \ Name=$name \ Filter=$pats \ - UniqueIdentifier=`generate_uuid` + UniqueIdentifier=`generate_uuid` \ file_list_sz=${#file_list[@]} for i in ${!file_list[@]}; do @@ -145,31 +145,21 @@ if [ "$pat" == "asm" ] && $asm_use_custom_step; then for plat in "${platforms[@]}"; do for cfg in Debug Release; do - open_tag FileConfiguration \ - Name="${cfg}|${plat}" + open_tag FileConfiguration \ + Name="${cfg}|${plat}" \ + tag Tool \ Name="VCCustomBuildTool" \ Description="Assembling \$(InputFileName)" \ - CommandLine="$(eval echo \$asm_${cfg}_cmdline)"\ - Outputs="\$(InputName).obj" + CommandLine="$(eval echo \$asm_${cfg}_cmdline)" \ + Outputs="\$(InputName).obj" \ + close_tag FileConfiguration done done fi - if [ "${f##*.}" == "cpp" ]; then - for plat in "${platforms[@]}"; do - for cfg in Debug Release; do - open_tag FileConfiguration \ - Name="${cfg}|${plat}" - tag Tool \ - Name="VCCLCompilerTool" \ - CompileAs="2" - close_tag FileConfiguration - done - done - fi - close_tag File + close_tag File break fi @@ -185,57 +175,61 @@ for opt in "$@"; do optval="${opt#*=}" case "$opt" in - --help|-h) show_help - ;; - --target=*) target="${optval}" - ;; - --out=*) outfile="$optval" - ;; - --name=*) name="${optval}" - ;; - --proj-guid=*) guid="${optval}" - ;; - --module-def=*) - link_opts="${link_opts} ModuleDefinitionFile=${optval}" - ;; - --exe) proj_kind="exe" - ;; - --lib) proj_kind="lib" - ;; - --static-crt) use_static_runtime=true - ;; - --ver=*) vs_ver="$optval" - case $optval in - [789]) - ;; - *) die Unrecognized Visual Studio Version in $opt - ;; - esac - ;; - -I*) opt="${opt%/}" - incs="${incs}${incs:+;}"${opt##-I}"" - yasmincs="${yasmincs} ${opt}" - ;; - -D*) defines="${defines}${defines:+;}${opt##-D}" - ;; - -L*) # fudge . to $(OutDir) - if [ "${opt##-L}" == "." ]; then - libdirs="${libdirs}${libdirs:+;}"\$(OutDir)"" - else - # Also try directories for this platform/configuration - libdirs="${libdirs}${libdirs:+;}"${opt##-L}"" - libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)/\$(ConfigurationName)"" - libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)"" - fi - ;; - -l*) libs="${libs}${libs:+ }${opt##-l}.lib" - ;; - -*) die_unknown $opt - ;; - *) file_list[${#file_list[@]}]="$opt" - case "$opt" in - *.asm) uses_asm=true;; - esac + --help|-h) show_help + ;; + --target=*) target="${optval}" + ;; + --out=*) outfile="$optval" + ;; + --name=*) name="${optval}" + ;; + --proj-guid=*) guid="${optval}" + ;; + --module-def=*) link_opts="${link_opts} ModuleDefinitionFile=${optval}" + ;; + --exe) proj_kind="exe" + ;; + --lib) proj_kind="lib" + ;; + --static-crt) use_static_runtime=true + ;; + --ver=*) + vs_ver="$optval" + case "$optval" in + [789]) + ;; + *) die Unrecognized Visual Studio Version in $opt + ;; + esac + ;; + -I*) + opt="${opt%/}" + incs="${incs}${incs:+;}"${opt##-I}"" + yasmincs="${yasmincs} ${opt}" + ;; + -D*) defines="${defines}${defines:+;}${opt##-D}" + ;; + -L*) # fudge . to $(OutDir) + if [ "${opt##-L}" == "." ]; then + libdirs="${libdirs}${libdirs:+;}"\$(OutDir)"" + else + # Also try directories for this platform/configuration + libdirs="${libdirs}${libdirs:+;}"${opt##-L}"" + libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)/\$(ConfigurationName)"" + libdirs="${libdirs}${libdirs:+;}"${opt##-L}/\$(PlatformName)"" + fi + ;; + -l*) libs="${libs}${libs:+ }${opt##-l}.lib" + ;; + -*) die_unknown $opt + ;; + *) + file_list[${#file_list[@]}]="$opt" + case "$opt" in + *.asm) uses_asm=true + ;; + esac + ;; esac done outfile=${outfile:-/dev/stdout} @@ -278,11 +272,7 @@ # List Keyword for this target case "$target" in - x86*) - keyword="ManagedCProj" - ;; - arm*|iwmmx*) - keyword="Win32Proj" + x86*) keyword="ManagedCProj" ;; *) die "Unsupported target $target!" esac @@ -298,402 +288,186 @@ asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)"" asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)"" ;; - arm*|iwmmx*) - case "${name}" in - obj_int_extract) platforms[0]="Win32" - ;; - *) platforms[0]="Pocket PC 2003 (ARMV4)" - ;; - esac - ;; *) die "Unsupported target $target!" -esac - -# List Command-line Arguments for this target -case "$target" in - arm*|iwmmx*) - if [ "$name" == "example" ];then - ARGU="--codec vp6 --flipuv --progress _bnd.vp6" - fi - if [ "$name" == "xma" ];then - ARGU="--codec vp6 -h 240 -w 320 -v" - fi ;; esac generate_vcproj() { case "$proj_kind" in - exe) vs_ConfigurationType=1 - ;; - *) vs_ConfigurationType=4 - ;; + exe) vs_ConfigurationType=1 + ;; + *) vs_ConfigurationType=4 + ;; esac echo "" - open_tag VisualStudioProject \ - ProjectType="Visual C++" \ - Version="${vs_ver_id}" \ - Name="${name}" \ - ProjectGUID="{${guid}}" \ - RootNamespace="${name}" \ - Keyword="${keyword}" + open_tag VisualStudioProject \ + ProjectType="Visual C++" \ + Version="${vs_ver_id}" \ + Name="${name}" \ + ProjectGUID="{${guid}}" \ + RootNamespace="${name}" \ + Keyword="${keyword}" \ - open_tag Platforms + open_tag Platforms for plat in "${platforms[@]}"; do - tag Platform Name="$plat" + tag Platform Name="$plat" done close_tag Platforms - open_tag ToolFiles + open_tag ToolFiles case "$target" in x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules" ;; - arm*|iwmmx*) - if [ "$name" == "vpx" ];then - case "$target" in - armv5*) - tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules" - ;; - armv6*) - tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv6.rules" - ;; - iwmmxt*) - tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmxscale.rules" - ;; - esac - fi - ;; esac close_tag ToolFiles - open_tag Configurations + open_tag Configurations for plat in "${platforms[@]}"; do plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'` - open_tag Configuration \ - Name="Debug|$plat" \ - OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ - IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ - ConfigurationType="$vs_ConfigurationType" \ - CharacterSet="1" - - if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then - case "$name" in - vpx) tag Tool \ - Name="VCPreBuildEventTool" \ - CommandLine="call obj_int_extract.bat \$(ConfigurationName)" - tag Tool \ - Name="VCMIDLTool" \ - TargetEnvironment="1" - tag Tool \ - Name="VCCLCompilerTool" \ - ExecutionBucket="7" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \ - MinimalRebuild="true" \ - RuntimeLibrary="1" \ - BufferSecurityCheck="false" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="1" \ - CompileAs="1" - tag Tool \ - Name="VCResourceCompilerTool" \ - PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \ - Culture="1033" \ - AdditionalIncludeDirectories="\$(IntDir)" \ - ;; - example|xma) tag Tool \ - Name="VCCLCompilerTool" \ - ExecutionBucket="7" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \ - MinimalRebuild="true" \ - RuntimeLibrary="1" \ - BufferSecurityCheck="false" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="1" \ - CompileAs="1" - tag Tool \ - Name="VCResourceCompilerTool" \ - PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \ - Culture="1033" \ - AdditionalIncludeDirectories="\$(IntDir)" \ - ;; - obj_int_extract) tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE" \ - RuntimeLibrary="1" \ - WarningLevel="3" \ - DebugInformationFormat="1" \ - ;; - esac - fi + open_tag Configuration \ + Name="Debug|$plat" \ + OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ + IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ + ConfigurationType="$vs_ConfigurationType" \ + CharacterSet="1" \ case "$target" in - x86*) tag Tool \ - Name="VCCLCompilerTool" \ - Optimization="0" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$debug_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="1" \ - Detect64BitPortabilityProblems="true" \ + x86*) + case "$name" in + *) + tag Tool \ + Name="VCCLCompilerTool" \ + Optimization="0" \ + AdditionalIncludeDirectories="$incs" \ + PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ + RuntimeLibrary="$debug_runtime" \ + UsePrecompiledHeader="0" \ + WarningLevel="3" \ + DebugInformationFormat="1" \ + Detect64BitPortabilityProblems="true" \ - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1" + $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" Debug="1" + ;; + esac ;; esac case "$proj_kind" in exe) case "$target" in - x86*) tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$debug_libs \$(NoInherit)" \ - AdditionalLibraryDirectories="$libdirs" \ - GenerateDebugInformation="true" \ - ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \ - - ;; - arm*|iwmmx*) + x86*) case "$name" in - obj_int_extract) tag Tool \ - Name="VCLinkerTool" \ - OutputFile="${name}.exe" \ - GenerateDebugInformation="true" - ;; - *) tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$debug_libs" \ - OutputFile="\$(OutDir)/${name}.exe" \ - LinkIncremental="2" \ - AdditionalLibraryDirectories="${libdirs};"..\lib/$plat_no_ws"" \ - DelayLoadDLLs="\$(NOINHERIT)" \ - GenerateDebugInformation="true" \ - ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \ - SubSystem="9" \ - StackReserveSize="65536" \ - StackCommitSize="4096" \ - EntryPointSymbol="mainWCRTStartup" \ - TargetMachine="3" + *) + tag Tool \ + Name="VCLinkerTool" \ + AdditionalDependencies="$debug_libs \$(NoInherit)" \ + AdditionalLibraryDirectories="$libdirs" \ + GenerateDebugInformation="true" \ + ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \ ;; esac - ;; + ;; esac ;; lib) case "$target" in - arm*|iwmmx*) tag Tool \ - Name="VCLibrarianTool" \ - AdditionalOptions=" /subsystem:windowsce,4.20 /machine:ARM" \ - OutputFile="\$(OutDir)/${name}.lib" \ - ;; - *) tag Tool \ - Name="VCLibrarianTool" \ - OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \ - ;; + x86*) + tag Tool \ + Name="VCLibrarianTool" \ + OutputFile="\$(OutDir)/${name}${lib_sfx}d.lib" \ + + ;; esac ;; - dll) tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="\$(NoInherit)" \ - LinkIncremental="2" \ - GenerateDebugInformation="true" \ - AssemblyDebug="1" \ - TargetMachine="1" \ - $link_opts + dll) + tag Tool \ + Name="VCLinkerTool" \ + AdditionalDependencies="\$(NoInherit)" \ + LinkIncremental="2" \ + GenerateDebugInformation="true" \ + AssemblyDebug="1" \ + TargetMachine="1" \ + $link_opts \ + + ;; esac - if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then - case "$name" in - vpx) tag DeploymentTool \ - ForceDirty="-1" \ - RegisterOutput="0" - ;; - example|xma) tag DeploymentTool \ - ForceDirty="-1" \ - RegisterOutput="0" - tag DebuggerTool \ - Arguments="${ARGU}" - ;; - esac - fi close_tag Configuration - open_tag Configuration \ - Name="Release|$plat" \ - OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ - IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ - ConfigurationType="$vs_ConfigurationType" \ - CharacterSet="1" \ - WholeProgramOptimization="0" - - if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then - case "$name" in - vpx) tag Tool \ - Name="VCPreBuildEventTool" \ - CommandLine="call obj_int_extract.bat \$(ConfigurationName)" - tag Tool \ - Name="VCMIDLTool" \ - TargetEnvironment="1" - tag Tool \ - Name="VCCLCompilerTool" \ - ExecutionBucket="7" \ - Optimization="2" \ - FavorSizeOrSpeed="1" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \ - RuntimeLibrary="0" \ - BufferSecurityCheck="false" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - CompileAs="1" - tag Tool \ - Name="VCResourceCompilerTool" \ - PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \ - Culture="1033" \ - AdditionalIncludeDirectories="\$(IntDir)" \ - ;; - example|xma) tag Tool \ - Name="VCCLCompilerTool" \ - ExecutionBucket="7" \ - Optimization="2" \ - FavorSizeOrSpeed="1" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \ - RuntimeLibrary="0" \ - BufferSecurityCheck="false" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - CompileAs="1" - tag Tool \ - Name="VCResourceCompilerTool" \ - PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES)" \ - Culture="1033" \ - AdditionalIncludeDirectories="\$(IntDir)" \ - ;; - obj_int_extract) tag Tool \ - Name="VCCLCompilerTool" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE" \ - RuntimeLibrary="0" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - Detect64BitPortabilityProblems="true" \ - DebugInformationFormat="0" \ - ;; - esac - fi + open_tag Configuration \ + Name="Release|$plat" \ + OutputDirectory="\$(SolutionDir)$plat_no_ws/\$(ConfigurationName)" \ + IntermediateDirectory="$plat_no_ws/\$(ConfigurationName)/${name}" \ + ConfigurationType="$vs_ConfigurationType" \ + CharacterSet="1" \ + WholeProgramOptimization="0" \ - case "$target" in - x86*) tag Tool \ - Name="VCCLCompilerTool" \ - AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ - RuntimeLibrary="$release_runtime" \ - UsePrecompiledHeader="0" \ - WarningLevel="3" \ - DebugInformationFormat="0" \ - Detect64BitPortabilityProblems="true" + case "$target" in + x86*) + case "$name" in + *) + tag Tool \ + Name="VCCLCompilerTool" \ + AdditionalIncludeDirectories="$incs" \ + PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ + RuntimeLibrary="$release_runtime" \ + UsePrecompiledHeader="0" \ + WarningLevel="3" \ + DebugInformationFormat="0" \ + Detect64BitPortabilityProblems="true" \ - $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" - ;; + $uses_asm && tag Tool Name="YASM" IncludePaths="$incs" + ;; esac + ;; + esac case "$proj_kind" in exe) case "$target" in - x86*) tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$libs \$(NoInherit)" \ - AdditionalLibraryDirectories="$libdirs" \ - ;; - arm*|iwmmx*) + x86*) case "$name" in - obj_int_extract) tag Tool \ - Name="VCLinkerTool" \ - OutputFile="${name}.exe" \ - LinkIncremental="1" \ - GenerateDebugInformation="false" \ - SubSystem="0" \ - OptimizeReferences="0" \ - EnableCOMDATFolding="0" \ - TargetMachine="0" - ;; - *) tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="$libs" \ - OutputFile="\$(OutDir)/${name}.exe" \ - LinkIncremental="1" \ - AdditionalLibraryDirectories="${libdirs};"..\lib/$plat_no_ws"" \ - DelayLoadDLLs="\$(NOINHERIT)" \ - GenerateDebugInformation="true" \ - ProgramDatabaseFile="\$(OutDir)/${name}.pdb" \ - SubSystem="9" \ - StackReserveSize="65536" \ - StackCommitSize="4096" \ - OptimizeReferences="2" \ - EnableCOMDATFolding="2" \ - EntryPointSymbol="mainWCRTStartup" \ - TargetMachine="3" + *) + tag Tool \ + Name="VCLinkerTool" \ + AdditionalDependencies="$libs \$(NoInherit)" \ + AdditionalLibraryDirectories="$libdirs" \ + ;; esac - ;; + ;; esac ;; - lib) + lib) case "$target" in - arm*|iwmmx*) tag Tool \ - Name="VCLibrarianTool" \ - AdditionalOptions=" /subsystem:windowsce,4.20 /machine:ARM" \ - OutputFile="\$(OutDir)/${name}.lib" \ - ;; - *) tag Tool \ - Name="VCLibrarianTool" \ - OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \ - ;; + x86*) + tag Tool \ + Name="VCLibrarianTool" \ + OutputFile="\$(OutDir)/${name}${lib_sfx}.lib" \ + + ;; esac - ;; - dll) # note differences to debug version: LinkIncremental, AssemblyDebug - tag Tool \ - Name="VCLinkerTool" \ - AdditionalDependencies="\$(NoInherit)" \ - LinkIncremental="1" \ - GenerateDebugInformation="true" \ - TargetMachine="1" \ - $link_opts - esac + ;; + dll) # note differences to debug version: LinkIncremental, AssemblyDebug + tag Tool \ + Name="VCLinkerTool" \ + AdditionalDependencies="\$(NoInherit)" \ + LinkIncremental="1" \ + GenerateDebugInformation="true" \ + TargetMachine="1" \ + $link_opts \ - if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then - case "$name" in - vpx) tag DeploymentTool \ - ForceDirty="-1" \ - RegisterOutput="0" - ;; - example|xma) tag DeploymentTool \ - ForceDirty="-1" \ - RegisterOutput="0" - tag DebuggerTool \ - Arguments="${ARGU}" - ;; - esac - fi + ;; + esac close_tag Configuration done close_tag Configurations - open_tag Files - generate_filter srcs "Source Files" "cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" - generate_filter hdrs "Header Files" "h;hpp;hxx;hm;inl;inc;xsd" + open_tag Files + generate_filter srcs "Source Files" "c;def;odl;idl;hpj;bat;asm;asmx" + generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd" generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav" generate_filter resrcs "Build Files" "mk" close_tag Files diff -Nru libvpx-0.9.5/build/make/gen_msvs_sln.sh libvpx-0.9.6/build/make/gen_msvs_sln.sh --- libvpx-0.9.5/build/make/gen_msvs_sln.sh 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/make/gen_msvs_sln.sh 2011-03-04 20:40:37.000000000 +0000 @@ -139,9 +139,6 @@ echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}" echo "${indent}${proj_guid}.${config}.Build.0 = ${config}" - if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then - echo "${indent}${proj_guid}.${config}.Deploy.0 = ${config}" - fi done IFS=${IFS_bak} done diff -Nru libvpx-0.9.5/build/make/obj_int_extract.c libvpx-0.9.6/build/make/obj_int_extract.c --- libvpx-0.9.5/build/make/obj_int_extract.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/build/make/obj_int_extract.c 2011-03-04 20:40:37.000000000 +0000 @@ -590,7 +590,7 @@ //log_msg("COFF: Symbol table at offset %u\n", symtab_ptr); //log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr); - fp = fopen("vpx_asm_offsets.asm", "w"); + fp = fopen("assembly_offsets.asm", "w"); if (fp == NULL) { diff -Nru libvpx-0.9.5/CHANGELOG libvpx-0.9.6/CHANGELOG --- libvpx-0.9.5/CHANGELOG 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/CHANGELOG 2011-03-04 20:40:37.000000000 +0000 @@ -1,3 +1,80 @@ +2011-03-07 v0.9.6 "Bali" + Our second named release, focused on a faster, higher quality, encoder. + + - Upgrading: + This release is backwards compatible with Aylesbury (v0.9.5). Users + of older releases should refer to the Upgrading notes in this + document for that release. + + - Enhancements: + vpxenc --psnr shows a summary when encode completes + --tune=ssim option to enable activity masking + improved postproc visualizations for development + updated support for Apple iOS to SDK 4.2 + query decoder to determine which reference frames were updated + implemented error tracking in the decoder + fix pipe support on windows + + - Speed: + Primary focus was on good quality mode, speed 0. Average improvement + on x86 about 40%, up to 100% on user-generated content at that speed. + Best quality mode speed improved 35%, and realtime speed 10-20%. This + release also saw significant improvement in realtime encoding speed + on ARM platforms. + + Improved encoder threading + Dont pick encoder filter level when loopfilter is disabled. + Avoid double copying of key frames into alt and golden buffer + FDCT optimizations. + x86 sse2 temporal filter + SSSE3 version of fast quantizer + vp8_rd_pick_best_mbsegmentation code restructure + Adjusted breakout RD for SPLITMV + Changed segmentation check order + Improved rd_pick_intra4x4block + Adds armv6 optimized variance calculation + ARMv6 optimized sad16x16 + ARMv6 optimized half pixel variance calculations + Full search SAD function optimization in SSE4.1 + Improve MV prediction accuracy to achieve performance gain + Improve MV prediction in vp8_pick_inter_mode() for speed>3 + + - Quality: + Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release + also includes support for "activity masking," which greatly improves + SSIM at the expense of PSNR. For now, this feature is available with + the --tune=ssim option. Further experimentation in this area + is ongoing. This release also introduces a new rate control mode + called "CQ," which changes the allocation of bits within a clip to + the sections where they will have the most visual impact. + + Tuning for the more exact quantizer. + Relax rate control for last few frames + CQ Mode + Limit key frame quantizer for forced key frames. + KF/GF Pulsing + Add simple version of activity masking. + make rdmult adaptive for intra in quantizer RDO + cap the best quantizer for 2nd order DC + change the threshold of DC check for encode breakout + + - Bug Fixes: + Fix crash on Sparc Solaris. + Fix counter of fixed keyframe distance + ARNR filter pointer update bug fix + Fixed use of motion percentage in KF/GF group calc + Changed condition for using RD in Intra Mode + Fix encoder real-time only configuration. + Fix ARM encoder crash with multiple token partitions + Fixed bug first cluster timecode of webm file is wrong. + Fixed various encoder bugs with odd-sized images + vp8e_get_preview fixed when spatial resampling enabled + quantizer: fix assertion in fast quantizer path + Allocate source buffers to be multiples of 16 + Fix for manual Golden frame frequency + Fix drastic undershoot in long form content + + 2010-10-28 v0.9.5 "Aylesbury" Our first named release, focused on a faster decoder, and a better encoder. diff -Nru libvpx-0.9.5/configure libvpx-0.9.6/configure --- libvpx-0.9.5/configure 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/configure 2011-03-04 20:40:37.000000000 +0000 @@ -40,7 +40,7 @@ ${toggle_runtime_cpu_detect} runtime cpu detection ${toggle_shared} shared library support ${toggle_small} favor smaller size over speed - ${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only) + ${toggle_postproc_visualizer} macro block / block level visualizers Codecs: Codecs can be selectively enabled or disabled individually, or by family: @@ -78,22 +78,21 @@ # alphabetically by architecture, generic-gnu last. all_platforms="${all_platforms} armv5te-linux-rvct" all_platforms="${all_platforms} armv5te-linux-gcc" +all_platforms="${all_platforms} armv5te-none-rvct" all_platforms="${all_platforms} armv5te-symbian-gcc" -all_platforms="${all_platforms} armv5te-wince-vs8" all_platforms="${all_platforms} armv6-darwin-gcc" all_platforms="${all_platforms} armv6-linux-rvct" all_platforms="${all_platforms} armv6-linux-gcc" +all_platforms="${all_platforms} armv6-none-rvct" all_platforms="${all_platforms} armv6-symbian-gcc" -all_platforms="${all_platforms} armv6-wince-vs8" all_platforms="${all_platforms} iwmmxt-linux-rvct" all_platforms="${all_platforms} iwmmxt-linux-gcc" -all_platforms="${all_platforms} iwmmxt-wince-vs8" all_platforms="${all_platforms} iwmmxt2-linux-rvct" all_platforms="${all_platforms} iwmmxt2-linux-gcc" -all_platforms="${all_platforms} iwmmxt2-wince-vs8" all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8 +all_platforms="${all_platforms} armv7-none-rvct" #neon Cortex-A8 all_platforms="${all_platforms} mips32-linux-gcc" all_platforms="${all_platforms} ppc32-darwin8-gcc" all_platforms="${all_platforms} ppc32-darwin9-gcc" @@ -114,6 +113,7 @@ all_platforms="${all_platforms} x86-win32-vs8" all_platforms="${all_platforms} x86-win32-vs9" all_platforms="${all_platforms} x86_64-darwin9-gcc" +all_platforms="${all_platforms} x86_64-darwin10-gcc" all_platforms="${all_platforms} x86_64-linux-gcc" all_platforms="${all_platforms} x86_64-linux-icc" all_platforms="${all_platforms} x86_64-solaris-gcc" @@ -157,6 +157,7 @@ enable md5 enable spatial_resampling enable multithread +enable os_support [ -d ${source_path}/../include ] && enable alt_tree_layout for d in vp8; do @@ -199,6 +200,7 @@ sse2 sse3 ssse3 + sse4_1 altivec " @@ -248,7 +250,8 @@ realtime_only shared small - arm_asm_detok + postproc_visualizer + os_support " CMDLINE_SELECT=" extra_warnings @@ -287,7 +290,7 @@ realtime_only shared small - arm_asm_detok + postproc_visualizer " process_cmdline() { @@ -295,7 +298,7 @@ optval="${opt#*=}" case "$opt" in --disable-codecs) for c in ${CODECS}; do disable $c; done ;; - *) process_common_cmdline $opt + *) process_common_cmdline "$opt" ;; esac done @@ -324,8 +327,6 @@ for c in ${CODECS}; do enabled ${c} && enable ${c##*_}s done - - } @@ -535,6 +536,10 @@ # Other toolchain specific defaults case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac + + if enabled postproc_visualizer; then + enabled postproc || die "postproc_visualizer requires postproc to be enabled" + fi } diff -Nru libvpx-0.9.5/debian/changelog libvpx-0.9.6/debian/changelog --- libvpx-0.9.5/debian/changelog 2011-02-10 11:11:05.000000000 +0000 +++ libvpx-0.9.6/debian/changelog 2011-03-08 23:12:05.000000000 +0000 @@ -1,8 +1,22 @@ -libvpx (0.9.5-2~ucd1~maverick) maverick; urgency=low +libvpx (0.9.6-1~ucd1~maverick) maverick; urgency=low * Upload to the Chromium PPA - -- Fabien Tassin Thu, 10 Feb 2011 12:11:03 +0100 + -- Fabien Tassin Wed, 09 Mar 2011 00:11:38 +0100 + +libvpx (0.9.6-1~lucid1) lucid; urgency=low + + * Upload to GStreamer developers PPA. + + -- Sebastian Dröge Tue, 08 Mar 2011 18:12:05 +0100 + +libvpx (0.9.6-1) unstable; urgency=low + + * New upstream release, "Bali": + + debian/patches/02_cve-2010-4489.patch: + - Dropped, merged upstream. + + -- Sebastian Dröge Tue, 08 Mar 2011 17:58:26 +0100 libvpx (0.9.5-2) unstable; urgency=low diff -Nru libvpx-0.9.5/debian/patches/02_cve-2010-4489.patch libvpx-0.9.6/debian/patches/02_cve-2010-4489.patch --- libvpx-0.9.5/debian/patches/02_cve-2010-4489.patch 2011-02-08 11:00:56.000000000 +0000 +++ libvpx-0.9.6/debian/patches/02_cve-2010-4489.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,70 +0,0 @@ -From: John Koleszar -Date: Thu, 4 Nov 2010 20:59:26 +0000 (-0400) -Subject: fix integer promotion bug in partition size check -X-Git-Url: https://review.webmproject.org/gitweb?p=libvpx.git;a=commitdiff_plain;h=9fb80f7170ec48e23c3c7b477149eeb37081c699 - -fix integer promotion bug in partition size check - -The check '(user_data_end - partition < partition_size)' must be -evaluated as a signed comparison, but because partition_size was -unsigned, the LHS was promoted to unsigned, causing an incorrect -result on 32-bit. Instead, check the upper and lower bounds of -the segment separately. - -Change-Id: I6266aba7fd7de084268712a3d2a81424ead7aa06 ---- - -diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c -index 2d81d61..f5e49a1 100644 ---- a/vp8/decoder/decodframe.c -+++ b/vp8/decoder/decodframe.c -@@ -462,7 +462,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, - partition_size = user_data_end - partition; - } - -- if (user_data_end - partition < partition_size) -+ if (partition + partition_size > user_data_end -+ || partition + partition_size < partition) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " - "%d length", i + 1); -@@ -580,7 +581,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) - (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5; - data += 3; - -- if (data_end - data < first_partition_length_in_bytes) -+ if (data + first_partition_length_in_bytes > data_end -+ || data + first_partition_length_in_bytes < data) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition 0 length"); - vp8_setup_version(pc); -diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c -index e7e5356..f0adf5b 100644 ---- a/vp8/vp8_dx_iface.c -+++ b/vp8/vp8_dx_iface.c -@@ -253,8 +253,11 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) - { -- - vpx_codec_err_t res = VPX_CODEC_OK; -+ -+ if(data + data_sz <= data) -+ res = VPX_CODEC_INVALID_PARAM; -+ else - { - /* Parse uncompresssed part of key frame header. - * 3 bytes:- including version, frame type and an offset -@@ -331,7 +334,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - - ctx->img_avail = 0; - -- /* Determine the stream parameters */ -+ /* Determine the stream parameters. Note that we rely on peek_si to -+ * validate that we have a buffer that does not wrap around the top -+ * of the heap. -+ */ - if (!ctx->si.h) - res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); - - diff -Nru libvpx-0.9.5/debian/patches/series libvpx-0.9.6/debian/patches/series --- libvpx-0.9.5/debian/patches/series 2011-02-08 11:00:56.000000000 +0000 +++ libvpx-0.9.6/debian/patches/series 2011-03-08 17:04:54.000000000 +0000 @@ -1,2 +1 @@ 01_enable-shared.patch -02_cve-2010-4489.patch diff -Nru libvpx-0.9.5/docs.mk libvpx-0.9.6/docs.mk --- libvpx-0.9.5/docs.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/docs.mk 2011-03-04 20:40:37.000000000 +0000 @@ -34,7 +34,8 @@ EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc -doxyfile: libs.doxy_template libs.doxy examples.doxy +doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy) +doxyfile: libs.doxy_template libs.doxy @echo " [CREATE] $@" @cat $^ > $@ @echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@ diff -Nru libvpx-0.9.5/examples/decoder_tmpl.c libvpx-0.9.6/examples/decoder_tmpl.c --- libvpx-0.9.5/examples/decoder_tmpl.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/decoder_tmpl.c 2011-03-04 20:40:37.000000000 +0000 @@ -19,7 +19,7 @@ #define VPX_CODEC_DISABLE_COMPAT 1 #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" -#define interface (&vpx_codec_vp8_dx_algo) +#define interface (vpx_codec_vp8_dx()) @EXTRA_INCLUDES diff -Nru libvpx-0.9.5/examples/decoder_tmpl.txt libvpx-0.9.6/examples/decoder_tmpl.txt --- libvpx-0.9.5/examples/decoder_tmpl.txt 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/decoder_tmpl.txt 2011-03-04 20:40:37.000000000 +0000 @@ -2,7 +2,7 @@ #define VPX_CODEC_DISABLE_COMPAT 1 #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" -#define interface (&vpx_codec_vp8_dx_algo) +#define interface (vpx_codec_vp8_dx()) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES diff -Nru libvpx-0.9.5/examples/encoder_tmpl.c libvpx-0.9.6/examples/encoder_tmpl.c --- libvpx-0.9.5/examples/encoder_tmpl.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/encoder_tmpl.c 2011-03-04 20:40:37.000000000 +0000 @@ -19,7 +19,7 @@ #define VPX_CODEC_DISABLE_COMPAT 1 #include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" -#define interface (&vpx_codec_vp8_cx_algo) +#define interface (vpx_codec_vp8_cx()) #define fourcc 0x30385056 @EXTRA_INCLUDES diff -Nru libvpx-0.9.5/examples/encoder_tmpl.txt libvpx-0.9.6/examples/encoder_tmpl.txt --- libvpx-0.9.5/examples/encoder_tmpl.txt 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/encoder_tmpl.txt 2011-03-04 20:40:37.000000000 +0000 @@ -2,7 +2,7 @@ #define VPX_CODEC_DISABLE_COMPAT 1 #include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" -#define interface (&vpx_codec_vp8_cx_algo) +#define interface (vpx_codec_vp8_cx()) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES diff -Nru libvpx-0.9.5/examples/simple_decoder.txt libvpx-0.9.6/examples/simple_decoder.txt --- libvpx-0.9.5/examples/simple_decoder.txt 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/simple_decoder.txt 2011-03-04 20:40:39.000000000 +0000 @@ -33,7 +33,7 @@ ---------------------- The decoder is initialized by the following code. This is an example for the VP8 decoder, but the code is analogous for all algorithms. Replace -`&vpx_codec_vp8_dx_algo` with a pointer to the interface exposed by the +`vpx_codec_vp8_dx()` with a pointer to the interface exposed by the algorithm you want to use. The `cfg` argument is left as NULL in this example, because we want the algorithm to determine the stream configuration (width/height) and allocate memory automatically. This diff -Nru libvpx-0.9.5/examples/vp8_set_maps.txt libvpx-0.9.6/examples/vp8_set_maps.txt --- libvpx-0.9.5/examples/vp8_set_maps.txt 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples/vp8_set_maps.txt 2011-03-04 20:40:39.000000000 +0000 @@ -78,8 +78,8 @@ } else if(frame_cnt + 1 == 44) { vpx_active_map_t active; - active.rows = 240/16; - active.cols = 320/16; + active.rows = cfg.g_h/16; + active.cols = cfg.g_w/16; /* pass in null map to disable active_map*/ active.active_map = NULL; diff -Nru libvpx-0.9.5/examples.mk libvpx-0.9.6/examples.mk --- libvpx-0.9.5/examples.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/examples.mk 2011-03-04 20:40:37.000000000 +0000 @@ -17,6 +17,7 @@ vpxdec.SRCS += vpx_ports/vpx_timer.h vpxdec.SRCS += vpx/vpx_integer.h vpxdec.SRCS += args.c args.h vpx_ports/config.h +vpxdec.SRCS += tools_common.c tools_common.h vpxdec.SRCS += nestegg/halloc/halloc.h vpxdec.SRCS += nestegg/halloc/src/align.h vpxdec.SRCS += nestegg/halloc/src/halloc.c @@ -28,6 +29,7 @@ vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c vpxenc.SRCS += args.c args.h y4minput.c y4minput.h +vpxenc.SRCS += tools_common.c tools_common.h vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h vpxenc.SRCS += libmkv/EbmlIDs.h @@ -91,8 +93,16 @@ # Handle extra library flags depending on codec configuration -CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m +# We should not link to math library (libm) on RVCT +# when building for bare-metal targets +ifeq ($(CONFIG_OS_SUPPORT), yes) +CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m +else + ifeq ($(CONFIG_GCC), yes) + CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m + endif +endif # # End of specified files. The rest of the build rules should happen # automagically from here. diff -Nru libvpx-0.9.5/libs.mk libvpx-0.9.6/libs.mk --- libvpx-0.9.5/libs.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/libs.mk 2011-03-04 20:40:39.000000000 +0000 @@ -126,29 +126,6 @@ ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) -ifeq ($(ARCH_ARM),yes) -ifeq ($(HAVE_ARMV5TE),yes) -ARM_ARCH=v5 -endif -ifeq ($(HAVE_ARMV6),yes) -ARM_ARCH=v6 -endif -obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c - @cp $(SRC_PATH_BARE)/build/arm-wince-vs8/obj_int_extract.bat . - @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\ - --exe\ - --target=$(TOOLCHAIN)\ - $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ - --name=obj_int_extract\ - --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2\ - --out=$@ $^\ - -I".";"$(SRC_PATH_BARE)" - -PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj -PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat -endif - vpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ @@ -230,10 +207,39 @@ # # Add assembler dependencies for configuration and offsets # -#$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(BUILD_PFX)vpx_asm_offsets.asm $(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(filter %.asm.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm +# +# Calculate platform- and compiler-specific offsets for hand coded assembly +# +ifeq ($(ARCH_ARM), yes) + asm_com_offsets.asm: obj_int_extract + asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o + ./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o + CLEAN-OBJS += asm_com_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm + + ifeq ($(CONFIG_VP8_ENCODER), yes) + asm_enc_offsets.asm: obj_int_extract + asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o + ./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o + CLEAN-OBJS += asm_enc_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm + endif + + ifeq ($(CONFIG_VP8_DECODER), yes) + asm_dec_offsets.asm: obj_int_extract + asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o + ./obj_int_extract rvds $< $(ADS2GAS) > $@ + OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o + CLEAN-OBJS += asm_dec_offsets.asm + $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm + endif +endif + $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h) CLEAN-OBJS += $(BUILD_PFX)vpx_version.h diff -Nru libvpx-0.9.5/.mailmap libvpx-0.9.6/.mailmap --- libvpx-0.9.5/.mailmap 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/.mailmap 2011-03-04 20:40:37.000000000 +0000 @@ -1,2 +1,4 @@ Adrian Grange Johann Koenig +Tero Rintaluoma +Tom Finegan diff -Nru libvpx-0.9.5/mainpage.dox libvpx-0.9.6/mainpage.dox --- libvpx-0.9.5/mainpage.dox 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/mainpage.dox 2011-03-04 20:40:39.000000000 +0000 @@ -31,7 +31,7 @@ The WebM project is an open source project supported by its community. For questions about this SDK, please mail the apps-devel@webmproject.org list. To contribute, see http://www.webmproject.org/code/contribute and mail - vpx-devel@webmproject.org. + codec-devel@webmproject.org. */ /*!\page changelog CHANGELOG diff -Nru libvpx-0.9.5/md5_utils.c libvpx-0.9.6/md5_utils.c --- libvpx-0.9.5/md5_utils.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/md5_utils.c 2011-03-04 20:40:39.000000000 +0000 @@ -20,8 +20,6 @@ * Still in the public domain. */ -#include /* for stupid systems */ - #include /* for memcpy() */ #include "md5_utils.h" diff -Nru libvpx-0.9.5/README libvpx-0.9.6/README --- libvpx-0.9.5/README 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/README 2011-03-04 20:40:37.000000000 +0000 @@ -45,18 +45,14 @@ armv5te-linux-rvct armv5te-linux-gcc armv5te-symbian-gcc - armv5te-wince-vs8 armv6-darwin-gcc armv6-linux-rvct armv6-linux-gcc armv6-symbian-gcc - armv6-wince-vs8 iwmmxt-linux-rvct iwmmxt-linux-gcc - iwmmxt-wince-vs8 iwmmxt2-linux-rvct iwmmxt2-linux-gcc - iwmmxt2-wince-vs8 armv7-linux-rvct armv7-linux-gcc mips32-linux-gcc diff -Nru libvpx-0.9.5/solution.mk libvpx-0.9.6/solution.mk --- libvpx-0.9.5/solution.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/solution.mk 2011-03-04 20:40:39.000000000 +0000 @@ -9,32 +9,6 @@ ## -ifeq ($(ARCH_ARM),yes) -ARM_DEVELOP=no -ARM_DEVELOP:=$(if $(filter %vpx.vcproj,$(wildcard *.vcproj)),yes) - -ifeq ($(ARM_DEVELOP),yes) -vpx.sln: - @echo " [COPY] $@" - @cp $(SRC_PATH_BARE)/build/arm-wince-vs8/vpx.sln . -PROJECTS-yes += vpx.sln -else -vpx.sln: $(wildcard *.vcproj) - @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ - $(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \ - $(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \ - --ver=$(CONFIG_VS_VERSION)\ - --target=$(TOOLCHAIN)\ - --out=$@ $^ -vpx.sln.mk: vpx.sln - @true - -PROJECTS-yes += vpx.sln vpx.sln.mk --include vpx.sln.mk -endif - -else vpx.sln: $(wildcard *.vcproj) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ @@ -48,7 +22,6 @@ PROJECTS-yes += vpx.sln vpx.sln.mk -include vpx.sln.mk -endif # Always install this file, as it is an unconditional post-build rule. INSTALL_MAPS += src/% $(SRC_PATH_BARE)/% diff -Nru libvpx-0.9.5/tools_common.c libvpx-0.9.6/tools_common.c --- libvpx-0.9.5/tools_common.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/tools_common.c 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include "tools_common.h" +#ifdef _WIN32 +#include +#include +#endif + +FILE* set_binary_mode(FILE *stream) +{ + (void)stream; +#ifdef _WIN32 + _setmode(_fileno(stream), _O_BINARY); +#endif + return stream; +} diff -Nru libvpx-0.9.5/tools_common.h libvpx-0.9.6/tools_common.h --- libvpx-0.9.5/tools_common.h 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/tools_common.h 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TOOLS_COMMON_H +#define TOOLS_COMMON_H + +/* Sets a stdio stream into binary mode */ +FILE* set_binary_mode(FILE *stream); + +#endif diff -Nru libvpx-0.9.5/usage.dox libvpx-0.9.6/usage.dox --- libvpx-0.9.5/usage.dox 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/usage.dox 2011-03-04 20:40:39.000000000 +0000 @@ -25,7 +25,7 @@ codec may write into to store details about a single instance of that codec. Most of the context is implementation specific, and thus opaque to the application. The context structure as seen by the application is of fixed - size, and thus can be allocated eith with automatic storage or dynamically + size, and thus can be allocated with automatic storage or dynamically on the heap. Most operations require an initialized codec context. Codec context @@ -74,7 +74,7 @@ the ABI is versioned. The ABI version number must be passed at initialization time to ensure the application is using a header file that matches the library. The current ABI version number is stored in the - prepropcessor macros #VPX_CODEC_ABI_VERSION, #VPX_ENCODER_ABI_VERSION, and + preprocessor macros #VPX_CODEC_ABI_VERSION, #VPX_ENCODER_ABI_VERSION, and #VPX_DECODER_ABI_VERSION. For convenience, each initialization function has a wrapper macro that inserts the correct version number. These macros are named like the initialization methods, but without the _ver suffix. @@ -125,7 +125,7 @@ The special value 0 is reserved to represent an infinite deadline. In this case, the codec will perform as much processing as - possible to yeild the highest quality frame. + possible to yield the highest quality frame. By convention, the value 1 is used to mean "return as fast as possible." @@ -135,7 +135,7 @@ /*! \page usage_xma External Memory Allocation Applications that wish to have fine grained control over how and where - decoders allocate memory \ref MAY make use of the e_xternal Memory Allocation + decoders allocate memory \ref MAY make use of the eXternal Memory Allocation (XMA) interface. Not all codecs support the XMA \ref usage_features. To use a decoder in XMA mode, the decoder \ref MUST be initialized with the @@ -143,7 +143,7 @@ allocate is heavily dependent on the size of the encoded video frames. The size of the video must be known before requesting the decoder's memory map. This stream information can be obtained with the vpx_codec_peek_stream_info() - function, which does not require a contructed decoder context. If the exact + function, which does not require a constructed decoder context. If the exact stream is not known, a stream info structure can be created that reflects the maximum size that the decoder instance is required to support. @@ -175,7 +175,7 @@ \section usage_xma_seg_szalign Segment Size and Alignment The sz (size) and align (alignment) parameters describe the required size and alignment of the requested segment. Alignment will always be a power of - two. Applications \ref MUST honor the aligment requested. Failure to do so + two. Applications \ref MUST honor the alignment requested. Failure to do so could result in program crashes or may incur a speed penalty. \section usage_xma_seg_flags Segment Flags diff -Nru libvpx-0.9.5/vp8/common/alloccommon.c libvpx-0.9.6/vp8/common/alloccommon.c --- libvpx-0.9.5/vp8/common/alloccommon.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/alloccommon.c 2011-03-04 20:40:39.000000000 +0000 @@ -16,7 +16,6 @@ #include "findnearmv.h" #include "entropymode.h" #include "systemdependent.h" -#include "vpxerrors.h" extern void vp8_init_scan_order_mask(); @@ -71,7 +70,7 @@ if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; + return 1; } } @@ -88,13 +87,13 @@ if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; + return 1; } if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; + return 1; } oci->mb_rows = height >> 4; @@ -106,7 +105,7 @@ if (!oci->mip) { vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; + return 1; } oci->mi = oci->mip + oci->mode_info_stride + 1; @@ -117,7 +116,7 @@ if (!oci->above_context) { vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; + return 1; } vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols); diff -Nru libvpx-0.9.5/vp8/common/arm/arm_systemdependent.c libvpx-0.9.6/vp8/common/arm/arm_systemdependent.c --- libvpx-0.9.5/vp8/common/arm/arm_systemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/arm_systemdependent.c 2011-03-04 20:40:39.000000000 +0000 @@ -11,13 +11,13 @@ #include "vpx_ports/config.h" #include "vpx_ports/arm.h" -#include "g_common.h" -#include "pragmas.h" -#include "subpixel.h" -#include "loopfilter.h" -#include "recon.h" -#include "idct.h" -#include "onyxc_int.h" +#include "vp8/common/g_common.h" +#include "vp8/common/pragmas.h" +#include "vp8/common/subpixel.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/recon.h" +#include "vp8/common/idct.h" +#include "vp8/common/onyxc_int.h" extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); diff -Nru libvpx-0.9.5/vp8/common/arm/armv6/bilinearfilter_v6.asm libvpx-0.9.6/vp8/common/arm/armv6/bilinearfilter_v6.asm --- libvpx-0.9.5/vp8/common/arm/armv6/bilinearfilter_v6.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/armv6/bilinearfilter_v6.asm 2011-03-04 20:40:39.000000000 +0000 @@ -15,19 +15,19 @@ AREA |.text|, CODE, READONLY ; name this block of code ;------------------------------------- -; r0 unsigned char *src_ptr, -; r1 unsigned short *output_ptr, -; r2 unsigned int src_pixels_per_line, -; r3 unsigned int output_height, -; stack unsigned int output_width, -; stack const short *vp8_filter +; r0 unsigned char *src_ptr, +; r1 unsigned short *dst_ptr, +; r2 unsigned int src_pitch, +; r3 unsigned int height, +; stack unsigned int width, +; stack const short *vp8_filter ;------------------------------------- ; The output is transposed stroed in output array to make it easy for second pass filtering. |vp8_filter_block2d_bil_first_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address - ldr r4, [sp, #36] ; output width + ldr r4, [sp, #36] ; width mov r12, r3 ; outer-loop counter sub r2, r2, r4 ; src increment for height loop @@ -38,10 +38,10 @@ ldr r5, [r11] ; load up filter coefficients - mov r3, r3, lsl #1 ; output_height*2 + mov r3, r3, lsl #1 ; height*2 add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1) - mov r11, r1 ; save output_ptr for each row + mov r11, r1 ; save dst_ptr for each row cmp r5, #128 ; if filter coef = 128, then skip the filter beq bil_null_1st_filter @@ -140,17 +140,17 @@ ;--------------------------------- ; r0 unsigned short *src_ptr, -; r1 unsigned char *output_ptr, -; r2 int output_pitch, -; r3 unsigned int output_height, -; stack unsigned int output_width, -; stack const short *vp8_filter +; r1 unsigned char *dst_ptr, +; r2 int dst_pitch, +; r3 unsigned int height, +; stack unsigned int width, +; stack const short *vp8_filter ;--------------------------------- |vp8_filter_block2d_bil_second_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address - ldr r4, [sp, #36] ; output width + ldr r4, [sp, #36] ; width ldr r5, [r11] ; load up filter coefficients mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix diff -Nru libvpx-0.9.5/vp8/common/arm/armv6/sixtappredict8x4_v6.asm libvpx-0.9.6/vp8/common/arm/armv6/sixtappredict8x4_v6.asm --- libvpx-0.9.5/vp8/common/arm/armv6/sixtappredict8x4_v6.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/armv6/sixtappredict8x4_v6.asm 2011-03-04 20:40:39.000000000 +0000 @@ -243,8 +243,6 @@ ENDP ;----------------- - AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. ;One word each is reserved. Label filter_coeff can be used to access the data. ;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... _filter8_coeff_ diff -Nru libvpx-0.9.5/vp8/common/arm/bilinearfilter_arm.c libvpx-0.9.6/vp8/common/arm/bilinearfilter_arm.c --- libvpx-0.9.5/vp8/common/arm/bilinearfilter_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/bilinearfilter_arm.c 2011-03-04 20:40:39.000000000 +0000 @@ -10,128 +10,29 @@ #include -#include "subpixel.h" - -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -static const short bilinear_filters[8][2] = -{ - { 128, 0 }, - { 112, 16 }, - { 96, 32 }, - { 80, 48 }, - { 64, 64 }, - { 48, 80 }, - { 32, 96 }, - { 16, 112 } -}; - - -extern void vp8_filter_block2d_bil_first_pass_armv6 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); - -extern void vp8_filter_block2d_bil_second_pass_armv6 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); - -#if 0 -void vp8_filter_block2d_bil_first_pass_6 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - - for ( i=0; i> VP8_FILTER_SHIFT; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -void vp8_filter_block2d_bil_second_pass_6 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i,j; - int Temp; - - for ( i=0; i> VP8_FILTER_SHIFT); - src_ptr++; - } - - /* Next row... */ - /*src_ptr += src_pixels_per_line - output_width;*/ - output_ptr += output_pitch; - } -} -#endif +#include "vp8/common/filter.h" +#include "vp8/common/subpixel.h" +#include "bilinearfilter_arm.h" void vp8_filter_block2d_bil_armv6 ( unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, + unsigned char *dst_ptr, + unsigned int src_pitch, unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, + const short *HFilter, + const short *VFilter, int Width, int Height ) { - - unsigned short FData[36*16]; /* Temp data bufffer used in filtering */ + unsigned short FData[36*16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ - /* pixel_step = 1; */ - vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter); + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); /* then 1-D vertically... */ - vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter); + vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } @@ -148,8 +49,8 @@ const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } @@ -167,8 +68,8 @@ const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); } @@ -186,8 +87,8 @@ const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); } @@ -205,8 +106,8 @@ const short *HFilter; const short *VFilter; - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } diff -Nru libvpx-0.9.5/vp8/common/arm/bilinearfilter_arm.h libvpx-0.9.6/vp8/common/arm/bilinearfilter_arm.h --- libvpx-0.9.5/vp8/common/arm/bilinearfilter_arm.h 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/bilinearfilter_arm.h 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef BILINEARFILTER_ARM_H +#define BILINEARFILTER_ARM_H + +extern void vp8_filter_block2d_bil_first_pass_armv6 +( + const unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +); + +extern void vp8_filter_block2d_bil_second_pass_armv6 +( + const unsigned short *src_ptr, + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +); + +#endif /* BILINEARFILTER_ARM_H */ diff -Nru libvpx-0.9.5/vp8/common/arm/filter_arm.c libvpx-0.9.6/vp8/common/arm/filter_arm.c --- libvpx-0.9.5/vp8/common/arm/filter_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/filter_arm.c 2011-03-04 20:40:39.000000000 +0000 @@ -11,26 +11,10 @@ #include "vpx_ports/config.h" #include -#include "subpixel.h" +#include "vp8/common/filter.h" +#include "vp8/common/subpixel.h" #include "vpx_ports/mem.h" -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) = -{ - { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ - { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ - { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ - { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ - { 0, -1, 12, 123, -6, 0 }, -}; - - extern void vp8_filter_block2d_first_pass_armv6 ( unsigned char *src_ptr, @@ -93,11 +77,11 @@ { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* Vfilter is null. First pass only */ if (xoffset && !yoffset) @@ -129,47 +113,6 @@ } } -#if 0 -void vp8_sixtap_predict8x4_armv6 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */ - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - - /*if (xoffset && !yoffset) - { - vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter ); - }*/ - /* Hfilter is null. Second pass only */ - /*else if (!xoffset && yoffset) - { - vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter ); - } - else - { - if (yoffset & 0x1) - vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter ); - else*/ - - vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter ); - - vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter ); - /*}*/ -} -#endif - void vp8_sixtap_predict8x8_armv6 ( unsigned char *src_ptr, @@ -182,10 +125,10 @@ { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ if (xoffset && !yoffset) { @@ -224,10 +167,10 @@ { const short *HFilter; const short *VFilter; - DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */ + DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ if (xoffset && !yoffset) { diff -Nru libvpx-0.9.5/vp8/common/arm/loopfilter_arm.c libvpx-0.9.6/vp8/common/arm/loopfilter_arm.c --- libvpx-0.9.5/vp8/common/arm/loopfilter_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/loopfilter_arm.c 2011-03-04 20:40:39.000000000 +0000 @@ -11,8 +11,8 @@ #include "vpx_ports/config.h" #include -#include "loopfilter.h" -#include "onyxc_int.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/onyxc_int.h" extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6); extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6); @@ -41,13 +41,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -57,7 +57,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ @@ -65,13 +65,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -81,7 +81,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ @@ -94,10 +94,10 @@ vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -122,10 +122,10 @@ vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -148,10 +148,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); + vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); } void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -161,7 +161,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ @@ -169,10 +169,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); + vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); } void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -182,7 +182,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ @@ -195,7 +195,7 @@ vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride); + vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride); } void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -220,7 +220,7 @@ vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4); + vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4); } void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, diff -Nru libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict16x16_neon.asm libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict16x16_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict16x16_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict16x16_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -350,10 +350,7 @@ ENDP ;----------------- - AREA bifilters16_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _bifilter16_coeff_ DCD bifilter16_coeff bifilter16_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict4x4_neon.asm libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict4x4_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict4x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict4x4_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -123,10 +123,7 @@ ENDP ;----------------- - AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _bifilter4_coeff_ DCD bifilter4_coeff bifilter4_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict8x4_neon.asm libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict8x4_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict8x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict8x4_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -128,10 +128,7 @@ ENDP ;----------------- - AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _bifilter8x4_coeff_ DCD bifilter8x4_coeff bifilter8x4_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict8x8_neon.asm libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict8x8_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/bilinearpredict8x8_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/bilinearpredict8x8_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -176,10 +176,7 @@ ENDP ;----------------- - AREA bifilters8_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _bifilter8_coeff_ DCD bifilter8_coeff bifilter8_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/loopfilter_neon.asm libvpx-0.9.6/vp8/common/arm/neon/loopfilter_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/loopfilter_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/loopfilter_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -397,7 +397,8 @@ bx lr ENDP ; |vp8_loop_filter_horizontal_edge_y_neon| - AREA loopfilter_dat, DATA, READONLY +;----------------- + _lf_coeff_ DCD lf_coeff lf_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm libvpx-0.9.6/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -104,10 +104,7 @@ ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon| ;----------------- - AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _lfhy_coeff_ DCD lfhy_coeff lfhy_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm libvpx-0.9.6/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -145,10 +145,7 @@ ENDP ; |vp8_loop_filter_simple_vertical_edge_neon| ;----------------- - AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _vlfy_coeff_ DCD vlfy_coeff vlfy_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/mbloopfilter_neon.asm libvpx-0.9.6/vp8/common/arm/neon/mbloopfilter_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/mbloopfilter_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/mbloopfilter_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -505,7 +505,8 @@ bx lr ENDP ; |vp8_mbloop_filter_neon| - AREA mbloopfilter_dat, DATA, READONLY +;----------------- + _mblf_coeff_ DCD mblf_coeff mblf_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/recon_neon.c libvpx-0.9.6/vp8/common/arm/neon/recon_neon.c --- libvpx-0.9.5/vp8/common/arm/neon/recon_neon.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/recon_neon.c 2011-03-04 20:40:39.000000000 +0000 @@ -10,8 +10,8 @@ #include "vpx_ports/config.h" -#include "recon.h" -#include "blockd.h" +#include "vp8/common/recon.h" +#include "vp8/common/blockd.h" extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr); diff -Nru libvpx-0.9.5/vp8/common/arm/neon/shortidct4x4llm_neon.asm libvpx-0.9.6/vp8/common/arm/neon/shortidct4x4llm_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/shortidct4x4llm_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/shortidct4x4llm_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -113,10 +113,7 @@ ENDP ;----------------- - AREA idct4x4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _idct_coeff_ DCD idct_coeff idct_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/sixtappredict16x16_neon.asm libvpx-0.9.6/vp8/common/arm/neon/sixtappredict16x16_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/sixtappredict16x16_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/sixtappredict16x16_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -476,10 +476,7 @@ ENDP ;----------------- - AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _filter16_coeff_ DCD filter16_coeff filter16_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/sixtappredict4x4_neon.asm libvpx-0.9.6/vp8/common/arm/neon/sixtappredict4x4_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/sixtappredict4x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/sixtappredict4x4_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -407,10 +407,7 @@ ENDP ;----------------- - AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _filter4_coeff_ DCD filter4_coeff filter4_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/sixtappredict8x4_neon.asm libvpx-0.9.6/vp8/common/arm/neon/sixtappredict8x4_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/sixtappredict8x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/sixtappredict8x4_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -458,10 +458,7 @@ ENDP ;----------------- - AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _filter8_coeff_ DCD filter8_coeff filter8_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/neon/sixtappredict8x8_neon.asm libvpx-0.9.6/vp8/common/arm/neon/sixtappredict8x8_neon.asm --- libvpx-0.9.5/vp8/common/arm/neon/sixtappredict8x8_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/neon/sixtappredict8x8_neon.asm 2011-03-04 20:40:39.000000000 +0000 @@ -509,10 +509,7 @@ ENDP ;----------------- - AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _filter8_coeff_ DCD filter8_coeff filter8_coeff diff -Nru libvpx-0.9.5/vp8/common/arm/reconintra_arm.c libvpx-0.9.6/vp8/common/arm/reconintra_arm.c --- libvpx-0.9.5/vp8/common/arm/reconintra_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/reconintra_arm.c 2011-03-04 20:40:39.000000000 +0000 @@ -10,10 +10,10 @@ #include "vpx_ports/config.h" -#include "blockd.h" -#include "reconintra.h" +#include "vp8/common/blockd.h" +#include "vp8/common/reconintra.h" #include "vpx_mem/vpx_mem.h" -#include "recon.h" +#include "vp8/common/recon.h" #if HAVE_ARMV7 extern void vp8_build_intra_predictors_mby_neon_func( diff -Nru libvpx-0.9.5/vp8/common/arm/vpx_asm_offsets.c libvpx-0.9.6/vp8/common/arm/vpx_asm_offsets.c --- libvpx-0.9.5/vp8/common/arm/vpx_asm_offsets.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/arm/vpx_asm_offsets.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include - -#if CONFIG_VP8_ENCODER -#include "vpx_scale/yv12config.h" -#endif - -#if CONFIG_VP8_DECODER -#include "onyxd_int.h" -#endif - -#define DEFINE(sym, val) int sym = val; - -/* -#define BLANK() asm volatile("\n->" : : ) -*/ - -/* - * int main(void) - * { - */ - -#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER -DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); -DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); -DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); -DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); -DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); -DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); -DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); -DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); -DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); -DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); -#endif - -#if CONFIG_VP8_DECODER -DEFINE(mb_diff, offsetof(MACROBLOCKD, diff)); -DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor)); -DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride)); -DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer)); -DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer)); -DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer)); -DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available)); -DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available)); - -DEFINE(detok_scan, offsetof(DETOK, scan)); -DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove)); -DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr)); -DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr)); -DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr)); -DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x)); - -DEFINE(detok_A, offsetof(DETOK, A)); -DEFINE(detok_L, offsetof(DETOK, L)); - -DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr)); -DEFINE(detok_current_bc, offsetof(DETOK, current_bc)); -DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs)); -DEFINE(detok_eob, offsetof(DETOK, eob)); - -DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end)); -DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer)); -DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value)); -DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count)); -DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); - -DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); -DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length)); -#endif - -//add asserts for any offset that is not supported by assembly code -//add asserts for any size that is not supported by assembly code -/* - * return 0; - * } - */ diff -Nru libvpx-0.9.5/vp8/common/asm_com_offsets.c libvpx-0.9.6/vp8/common/asm_com_offsets.c --- libvpx-0.9.5/vp8/common/asm_com_offsets.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/common/asm_com_offsets.c 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include + +#include "vpx_scale/yv12config.h" + +#define ct_assert(name,cond) \ + static void assert_##name(void) UNUSED;\ + static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} + +#define DEFINE(sym, val) int sym = val; + +/* +#define BLANK() asm volatile("\n->" : : ) +*/ + +/* + * int main(void) + * { + */ + +//vpx_scale +DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); +DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); +DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); +DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); +DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); +DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); +DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); +DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); +DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); +DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); + +//add asserts for any offset that is not supported by assembly code +//add asserts for any size that is not supported by assembly code +/* + * return 0; + * } + */ diff -Nru libvpx-0.9.5/vp8/common/blockd.c libvpx-0.9.6/vp8/common/blockd.c --- libvpx-0.9.5/vp8/common/blockd.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/blockd.c 2011-03-04 20:40:39.000000000 +0000 @@ -12,8 +12,6 @@ #include "blockd.h" #include "vpx_mem/vpx_mem.h" -const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1}; - const unsigned char vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 diff -Nru libvpx-0.9.5/vp8/common/blockd.h libvpx-0.9.6/vp8/common/blockd.h --- libvpx-0.9.5/vp8/common/blockd.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/blockd.h 2011-03-04 20:40:39.000000000 +0000 @@ -28,11 +28,6 @@ #define DCPREDSIMTHRESH 0 #define DCPREDCNTTHRESH 3 -#define Y1CONTEXT 0 -#define UCONTEXT 1 -#define VCONTEXT 2 -#define Y2CONTEXT 3 - #define MB_FEATURE_TREE_PROBS 3 #define MAX_MB_SEGMENTS 4 @@ -48,6 +43,11 @@ int r, c; } POS; +#define PLANE_TYPE_Y_NO_DC 0 +#define PLANE_TYPE_Y2 1 +#define PLANE_TYPE_UV 2 +#define PLANE_TYPE_Y_WITH_DC 3 + typedef char ENTROPY_CONTEXT; typedef struct @@ -58,8 +58,6 @@ ENTROPY_CONTEXT y2; } ENTROPY_CONTEXT_PLANES; -extern const int vp8_block2type[25]; - extern const unsigned char vp8_block2left[25]; extern const unsigned char vp8_block2above[25]; @@ -282,6 +280,8 @@ void *current_bc; + int corrupted; + #if CONFIG_RUNTIME_CPU_DETECT struct VP8_COMMON_RTCD *rtcd; #endif diff -Nru libvpx-0.9.5/vp8/common/boolcoder.h libvpx-0.9.6/vp8/common/boolcoder.h --- libvpx-0.9.5/vp8/common/boolcoder.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/boolcoder.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,570 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef bool_coder_h -#define bool_coder_h 1 - -/* Arithmetic bool coder with largish probability range. - Timothy S Murphy 6 August 2004 */ - -/* So as not to force users to drag in too much of my idiosyncratic C++ world, - I avoid fancy storage management. */ - -#include - -#include -#include - -typedef unsigned char vp8bc_index_t; // probability index - -/* There are a couple of slight variants in the details of finite-precision - arithmetic coding. May be safely ignored by most users. */ - -enum vp8bc_rounding -{ - vp8bc_down = 0, // just like VP8 - vp8bc_down_full = 1, // handles minimum probability correctly - vp8bc_up = 2 -}; - -#if _MSC_VER - -/* Note that msvc by default does not inline _anything_ (regardless of the - setting of inline_depth) and that a command-line option (-Ob1 or -Ob2) - is required to inline even the smallest functions. */ - -# pragma inline_depth( 255) // I mean it when I inline something -# pragma warning( disable : 4099) // No class vs. struct harassment -# pragma warning( disable : 4250) // dominance complaints -# pragma warning( disable : 4284) // operator-> in templates -# pragma warning( disable : 4800) // bool conversion - -// don't let prefix ++,-- stand in for postfix, disaster would ensue - -# pragma warning( error : 4620 4621) - -#endif // _MSC_VER - - -#if __cplusplus - -// Sometimes one wishes to be definite about integer lengths. - -struct int_types -{ - typedef const bool cbool; - typedef const signed char cchar; - typedef const short cshort; - typedef const int cint; - typedef const int clong; - - typedef const double cdouble; - typedef const size_t csize_t; - - typedef unsigned char uchar; // 8 bits - typedef const uchar cuchar; - - typedef short int16; - typedef unsigned short uint16; - typedef const int16 cint16; - typedef const uint16 cuint16; - - typedef int int32; - typedef unsigned int uint32; - typedef const int32 cint32; - typedef const uint32 cuint32; - - typedef unsigned int uint; - typedef unsigned int ulong; - typedef const uint cuint; - typedef const ulong culong; - - - // All structs consume space, may as well have a vptr. - - virtual ~int_types(); -}; - - -struct bool_coder_spec; -struct bool_coder; -struct bool_writer; -struct bool_reader; - - -struct bool_coder_namespace : int_types -{ - typedef vp8bc_index_t Index; - typedef bool_coder_spec Spec; - typedef const Spec c_spec; - - enum Rounding - { - Down = vp8bc_down, - down_full = vp8bc_down_full, - Up = vp8bc_up - }; -}; - - -// Archivable specification of a bool coder includes rounding spec -// and probability mapping table. The latter replaces a uchar j -// (0 <= j < 256) with an arbitrary uint16 tbl[j] = p. -// p/65536 is then the probability of a zero. - -struct bool_coder_spec : bool_coder_namespace -{ - friend struct bool_coder; - friend struct bool_writer; - friend struct bool_reader; - friend struct bool_coder_spec_float; - friend struct bool_coder_spec_explicit_table; - friend struct bool_coder_spec_exponential_table; - friend struct BPsrc; -private: - uint w; // precision - Rounding r; - - uint ebits, mbits, ebias; - uint32 mmask; - - Index max_index, half_index; - - uint32 mantissa(Index i) const - { - assert(i < half_index); - return (1 << mbits) + (i & mmask); - } - uint exponent(Index i) const - { - assert(i < half_index); - return ebias - (i >> mbits); - } - - uint16 Ptbl[256]; // kinda clunky, but so is storage management. - - /* Cost in bits of encoding a zero at every probability, scaled by 2^20. - Assumes that index is at most 8 bits wide. */ - - uint32 Ctbl[256]; - - uint32 split(Index i, uint32 R) const // 1 <= split <= max( 1, R-1) - { - if (!ebias) - return 1 + (((R - 1) * Ptbl[i]) >> 16); - - if (i >= half_index) - return R - split(max_index - i, R); - - return 1 + (((R - 1) * mantissa(i)) >> exponent(i)); - } - - uint32 max_range() const - { - return (1 << w) - (r == down_full ? 0 : 1); - } - uint32 min_range() const - { - return (1 << (w - 1)) + (r == down_full ? 1 : 0); - } - uint32 Rinc() const - { - return r == Up ? 1 : 0; - } - - void check_prec() const; - - bool float_init(uint Ebits, uint Mbits); - - void cost_init(); - - bool_coder_spec( - uint prec, Rounding rr, uint Ebits = 0, uint Mbits = 0 - ) - : w(prec), r(rr) - { - float_init(Ebits, Mbits); - } -public: - // Read complete spec from file. - bool_coder_spec(FILE *); - - // Write spec to file. - void dump(FILE *) const; - - // return probability index best approximating prob. - Index operator()(double prob) const; - - // probability corresponding to index - double operator()(Index i) const; - - Index complement(Index i) const - { - return max_index - i; - } - - Index max_index() const - { - return max_index; - } - Index half_index() const - { - return half_index; - } - - uint32 cost_zero(Index i) const - { - return Ctbl[i]; - } - uint32 cost_one(Index i) const - { - return Ctbl[ max_index - i]; - } - uint32 cost_bit(Index i, bool b) const - { - return Ctbl[b? max_index-i:i]; - } -}; - - -/* Pseudo floating-point probability specification. - - At least one of Ebits and Mbits must be nonzero. - - Since all arithmetic is done at 32 bits, Ebits is at most 5. - - Total significant bits in index is Ebits + Mbits + 1. - - Below the halfway point (i.e. when the top significant bit is 0), - the index is (e << Mbits) + m. - - The exponent e is between 0 and (2**Ebits) - 1, - the mantissa m is between 0 and (2**Mbits) - 1. - - Prepending an implicit 1 to the mantissa, the probability is then - - (2**Mbits + m) >> (e - 2**Ebits - 1 - Mbits), - - which has (1/2)**(2**Ebits + 1) as a minimum - and (1/2) * [1 - 2**(Mbits + 1)] as a maximum. - - When the index is above the halfway point, the probability is the - complement of the probability associated to the complement of the index. - - Note that the probability increases with the index and that, because of - the symmetry, we cannot encode probability exactly 1/2; though we - can get as close to 1/2 as we like, provided we have enough Mbits. - - The latter is of course not a problem in practice, one never has - exact probabilities and entropy errors are second order, that is, the - "overcoding" of a zero will be largely compensated for by the - "undercoding" of a one (or vice-versa). - - Compared to arithmetic probability specs (a la VP8), this will do better - at very high and low probabilities and worse at probabilities near 1/2, - as well as facilitating the usage of wider or narrower probability indices. -*/ - -struct bool_coder_spec_float : bool_coder_spec -{ - bool_coder_spec_float( - uint Ebits = 3, uint Mbits = 4, Rounding rr = down_full, uint prec = 12 - ) - : bool_coder_spec(prec, rr, Ebits, Mbits) - { - cost_init(); - } -}; - - -struct bool_coder_spec_explicit_table : bool_coder_spec -{ - bool_coder_spec_explicit_table( - cuint16 probability_table[256] = 0, // default is tbl[i] = i << 8. - Rounding = down_full, - uint precision = 16 - ); -}; - -// Contruct table via multiplicative interpolation between -// p[128] = 1/2 and p[0] = (1/2)^x. -// Since we are working with 16-bit precision, x is at most 16. -// For probabilities to increase with i, we must have x > 1. -// For 0 <= i <= 128, p[i] = (1/2)^{ 1 + [1 - (i/128)]*[x-1] }. -// Finally, p[128+i] = 1 - p[128 - i]. - -struct bool_coder_spec_exponential_table : bool_coder_spec -{ - bool_coder_spec_exponential_table(uint x, Rounding = down_full, uint prec = 16); -}; - - -// Commonalities between writer and reader. - -struct bool_coder : bool_coder_namespace -{ - friend struct bool_writer; - friend struct bool_reader; - friend struct BPsrc; -private: - uint32 Low, Range; - cuint32 min_range; - cuint32 rinc; - c_spec spec; - - void _reset() - { - Low = 0; - Range = spec.max_range(); - } - - bool_coder(c_spec &s) - : min_range(s.min_range()), - rinc(s.Rinc()), - spec(s) - { - _reset(); - } - - uint32 half() const - { - return 1 + ((Range - 1) >> 1); - } -public: - c_spec &Spec() const - { - return spec; - } -}; - - -struct bool_writer : bool_coder -{ - friend struct BPsrc; -private: - uchar *Bstart, *Bend, *B; - int bit_lag; - bool is_toast; - void carry(); - void reset() - { - _reset(); - bit_lag = 32 - spec.w; - is_toast = 0; - } - void raw(bool value, uint32 split); -public: - bool_writer(c_spec &, uchar *Dest, size_t Len); - virtual ~bool_writer(); - - void operator()(Index p, bool v) - { - raw(v, spec.split(p, Range)); - } - - uchar *buf() const - { - return Bstart; - } - size_t bytes_written() const - { - return B - Bstart; - } - - // Call when done with input, flushes internal state. - // DO NOT write any more data after calling this. - - bool_writer &flush(); - - void write_bits(int n, uint val) - { - if (n) - { - uint m = 1 << (n - 1); - - do - { - raw((bool)(val & m), half()); - } - while (m >>= 1); - } - } - -# if 0 - // We are agnostic about storage management. - // By default, overflows throw an assert but user can - // override to provide an expanding buffer using ... - - virtual void overflow(uint Len) const; - - // ... this function copies already-written data into new buffer - // and retains new buffer location. - - void new_buffer(uchar *dest, uint Len); - - // Note that storage management is the user's responsibility. -# endif -}; - - -// This could be adjusted to use a little less lookahead. - -struct bool_reader : bool_coder -{ - friend struct BPsrc; -private: - cuchar *const Bstart; // for debugging - cuchar *B; - cuchar *const Bend; - cuint shf; - uint bct; - bool raw(uint32 split); -public: - bool_reader(c_spec &s, cuchar *src, size_t Len); - - bool operator()(Index p) - { - return raw(spec.split(p, Range)); - } - - uint read_bits(int num_bits) - { - uint v = 0; - - while (--num_bits >= 0) - v += v + (raw(half()) ? 1 : 0); - - return v; - } -}; - -extern "C" { - -#endif /* __cplusplus */ - - - /* C interface */ - - typedef struct bool_coder_spec bool_coder_spec; - typedef struct bool_writer bool_writer; - typedef struct bool_reader bool_reader; - - typedef const bool_coder_spec c_bool_coder_spec; - typedef const bool_writer c_bool_writer; - typedef const bool_reader c_bool_reader; - - - /* Optionally override default precision when constructing coder_specs. - Just pass a zero pointer if you don't care. - Precision is at most 16 bits for table specs, at most 23 otherwise. */ - - struct vp8bc_prec - { - enum vp8bc_rounding r; /* see top header file for def */ - unsigned int prec; /* range precision in bits */ - }; - - typedef const struct vp8bc_prec vp8bc_c_prec; - - /* bool_coder_spec contains mapping of uchars to actual probabilities - (16 bit uints) as well as (usually immaterial) selection of - exact finite-precision algorithm used (for now, the latter can only - be overridden using the C++ interface). - See comments above the corresponding C++ constructors for discussion, - especially of exponential probability table generation. */ - - bool_coder_spec *vp8bc_vp8spec(); // just like vp8 - - bool_coder_spec *vp8bc_literal_spec( - const unsigned short prob_map[256], // 0 is like vp8 w/more precision - vp8bc_c_prec* - ); - - bool_coder_spec *vp8bc_float_spec( - unsigned int exponent_bits, unsigned int mantissa_bits, vp8bc_c_prec* - ); - - bool_coder_spec *vp8bc_exponential_spec(unsigned int min_exp, vp8bc_c_prec *); - - bool_coder_spec *vp8bc_spec_from_file(FILE *); - - - void vp8bc_destroy_spec(c_bool_coder_spec *); - - void vp8bc_spec_to_file(c_bool_coder_spec *, FILE *); - - - /* Nearest index to supplied probability of zero, 0 <= prob <= 1. */ - - vp8bc_index_t vp8bc_index(c_bool_coder_spec *, double prob); - - vp8bc_index_t vp8bc_index_from_counts( - c_bool_coder_spec *p, unsigned int zero_ct, unsigned int one_ct - ); - - /* In case you want to look */ - - double vp8bc_probability(c_bool_coder_spec *, vp8bc_index_t); - - /* Opposite index */ - - vp8bc_index_t vp8bc_complement(c_bool_coder_spec *, vp8bc_index_t); - - /* Cost in bits of encoding a zero at given probability, scaled by 2^20. - (assumes that an int holds at least 32 bits). */ - - unsigned int vp8bc_cost_zero(c_bool_coder_spec *, vp8bc_index_t); - - unsigned int vp8bc_cost_one(c_bool_coder_spec *, vp8bc_index_t); - unsigned int vp8bc_cost_bit(c_bool_coder_spec *, vp8bc_index_t, int); - - - /* bool_writer interface */ - - /* Length = 0 disables checking for writes beyond buffer end. */ - - bool_writer *vp8bc_create_writer( - c_bool_coder_spec *, unsigned char *Destination, size_t Length - ); - - /* Flushes out any buffered data and returns total # of bytes written. */ - - size_t vp8bc_destroy_writer(bool_writer *); - - void vp8bc_write_bool(bool_writer *, int boolean_val, vp8bc_index_t false_prob); - - void vp8bc_write_bits( - bool_writer *, unsigned int integer_value, int number_of_bits - ); - - c_bool_coder_spec *vp8bc_writer_spec(c_bool_writer *); - - - /* bool_reader interface */ - - /* Length = 0 disables checking for reads beyond buffer end. */ - - bool_reader *vp8bc_create_reader( - c_bool_coder_spec *, const unsigned char *Source, size_t Length - ); - void vp8bc_destroy_reader(bool_reader *); - - int vp8bc_read_bool(bool_reader *, vp8bc_index_t false_prob); - - unsigned int vp8bc_read_bits(bool_reader *, int number_of_bits); - - c_bool_coder_spec *vp8bc_reader_spec(c_bool_reader *); - -#if __cplusplus -} -#endif - -#endif /* bool_coder_h */ diff -Nru libvpx-0.9.5/vp8/common/codec_common_interface.h libvpx-0.9.6/vp8/common/codec_common_interface.h --- libvpx-0.9.5/vp8/common/codec_common_interface.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/codec_common_interface.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef CODEC_COMMON_INTERFACE_H -#define CODEC_COMMON_INTERFACE_H - -#define __export -#define _export -#define dll_export __declspec( dllexport ) -#define dll_import __declspec( dllimport ) - -// Playback ERROR Codes. -#define NO_DECODER_ERROR 0 -#define REMOTE_DECODER_ERROR -1 - -#define DFR_BAD_DCT_COEFF -100 -#define DFR_ZERO_LENGTH_FRAME -101 -#define DFR_FRAME_SIZE_INVALID -102 -#define DFR_OUTPUT_BUFFER_OVERFLOW -103 -#define DFR_INVALID_FRAME_HEADER -104 -#define FR_INVALID_MODE_TOKEN -110 -#define ETR_ALLOCATION_ERROR -200 -#define ETR_INVALID_ROOT_PTR -201 -#define SYNCH_ERROR -400 -#define BUFFER_UNDERFLOW_ERROR -500 -#define PB_IB_OVERFLOW_ERROR -501 - -// External error triggers -#define PB_HEADER_CHECKSUM_ERROR -601 -#define PB_DATA_CHECKSUM_ERROR -602 - -// DCT Error Codes -#define DDCT_EXPANSION_ERROR -700 -#define DDCT_INVALID_TOKEN_ERROR -701 - -// exception_errors -#define GEN_EXCEPTIONS -800 -#define EX_UNQUAL_ERROR -801 - -// Unrecoverable error codes -#define FATAL_PLAYBACK_ERROR -1000 -#define GEN_ERROR_CREATING_CDC -1001 -#define GEN_THREAD_CREATION_ERROR -1002 -#define DFR_CREATE_BMP_FAILED -1003 - -// YUV buffer configuration structure -typedef struct -{ - int y_width; - int y_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_stride; - - unsigned char *y_buffer; - unsigned char *u_buffer; - unsigned char *v_buffer; - -} YUV_BUFFER_CONFIG; -typedef enum -{ - C_SET_KEY_FRAME, - C_SET_FIXED_Q, - C_SET_FIRSTPASS_FILE, - C_SET_EXPERIMENTAL_MIN, - C_SET_EXPERIMENTAL_MAX = C_SET_EXPERIMENTAL_MIN + 255, - C_SET_CHECKPROTECT, - C_SET_TESTMODE, - C_SET_INTERNAL_SIZE, - C_SET_RECOVERY_FRAME, - C_SET_REFERENCEFRAME, - C_SET_GOLDENFRAME - -#ifndef VP50_COMP_INTERFACE - // Specialist test facilities. -// C_VCAP_PARAMS, // DO NOT USE FOR NOW WITH VFW CODEC -#endif - -} C_SETTING; - -typedef unsigned long C_SET_VALUE; - - -#endif diff -Nru libvpx-0.9.5/vp8/common/entropy.c libvpx-0.9.6/vp8/common/entropy.c --- libvpx-0.9.5/vp8/common/entropy.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/entropy.c 2011-03-04 20:40:39.000000000 +0000 @@ -36,6 +36,14 @@ 7, 11, 14, 15, }; +DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = +{ + 1, 2, 6, 7, + 3, 5, 8, 13, + 4, 9, 12, 14, + 10, 11, 15, 16 +}; + DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; @@ -106,23 +114,20 @@ init_bit_tree(cat6, 11); } - -static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11]; - vp8_extra_bit_struct vp8_extra_bits[12] = { - { 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 1}, - { 0, 0, 0, 0, 2}, - { 0, 0, 0, 0, 3}, - { 0, 0, 0, 0, 4}, - { cat1, Pcat1, bcc1, 1, 5}, - { cat2, Pcat2, bcc2, 2, 7}, - { cat3, Pcat3, bcc3, 3, 11}, - { cat4, Pcat4, bcc4, 4, 19}, - { cat5, Pcat5, bcc5, 5, 35}, - { cat6, Pcat6, bcc6, 11, 67}, - { 0, 0, 0, 0, 0} + { 0, 0, 0, 0}, + { 0, 0, 0, 1}, + { 0, 0, 0, 2}, + { 0, 0, 0, 3}, + { 0, 0, 0, 4}, + { cat1, Pcat1, 1, 5}, + { cat2, Pcat2, 2, 7}, + { cat3, Pcat3, 3, 11}, + { cat4, Pcat4, 4, 19}, + { cat5, Pcat5, 5, 35}, + { cat6, Pcat6, 11, 67}, + { 0, 0, 0, 0} }; #include "defaultcoefcounts.h" diff -Nru libvpx-0.9.5/vp8/common/entropy.h libvpx-0.9.6/vp8/common/entropy.h --- libvpx-0.9.5/vp8/common/entropy.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/entropy.h 2011-03-04 20:40:39.000000000 +0000 @@ -24,10 +24,10 @@ #define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ #define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ #define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */ +#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ +#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ +#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ +#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ #define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ #define vp8_coef_tokens 12 @@ -42,7 +42,6 @@ { vp8_tree_p tree; const vp8_prob *prob; - vp8bc_index_t *prob_bc; int Len; int base_val; } vp8_extra_bit_struct; @@ -95,6 +94,7 @@ void vp8_default_coef_probs(struct VP8Common *); extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); +extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); extern short vp8_default_zig_zag_mask[16]; extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; diff -Nru libvpx-0.9.5/vp8/common/filter.c libvpx-0.9.6/vp8/common/filter.c --- libvpx-0.9.5/vp8/common/filter.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/common/filter.c 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include "filter.h" +#include "vpx_ports/mem.h" + +DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = +{ + { 128, 0 }, + { 112, 16 }, + { 96, 32 }, + { 80, 48 }, + { 64, 64 }, + { 48, 80 }, + { 32, 96 }, + { 16, 112 } +}; + +DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = +{ + + { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ + { 0, -6, 123, 12, -1, 0 }, + { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ + { 0, -9, 93, 50, -6, 0 }, + { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ + { 0, -6, 50, 93, -9, 0 }, + { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ + { 0, -1, 12, 123, -6, 0 }, +}; + +void vp8_filter_block2d_first_pass +( + unsigned char *src_ptr, + int *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) + { + for (j = 0; j < output_width; j++) + { + Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ + + /* Normalize back to 0-255 */ + Temp = Temp >> VP8_FILTER_SHIFT; + + if (Temp < 0) + Temp = 0; + else if (Temp > 255) + Temp = 255; + + output_ptr[j] = Temp; + src_ptr++; + } + + /* Next row... */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} + +void vp8_filter_block2d_second_pass +( + int *src_ptr, + unsigned char *output_ptr, + int output_pitch, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < output_height; i++) + { + for (j = 0; j < output_width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + + ((int)src_ptr[0] * vp8_filter[2]) + + ((int)src_ptr[pixel_step] * vp8_filter[3]) + + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ + + /* Normalize back to 0-255 */ + Temp = Temp >> VP8_FILTER_SHIFT; + + if (Temp < 0) + Temp = 0; + else if (Temp > 255) + Temp = 255; + + output_ptr[j] = (unsigned char)Temp; + src_ptr++; + } + + /* Start next row */ + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_pitch; + } +} + + +void vp8_filter_block2d +( + unsigned char *src_ptr, + unsigned char *output_ptr, + unsigned int src_pixels_per_line, + int output_pitch, + const short *HFilter, + const short *VFilter +) +{ + int FData[9*4]; /* Temp data buffer used in filtering */ + + /* First filter 1-D horizontally... */ + vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); + + /* then filter verticaly... */ + vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); +} + + +void vp8_block_variation_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int *HVar, + int *VVar +) +{ + int i, j; + unsigned char *Ptr = src_ptr; + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + *HVar += abs((int)Ptr[j] - (int)Ptr[j+1]); + *VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]); + } + + Ptr += src_pixels_per_line; + } +} + + + + +void vp8_sixtap_predict_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); +} +void vp8_sixtap_predict8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[13*16]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); + + + /* then filter verticaly... */ + vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); + +} + +void vp8_sixtap_predict8x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[13*16]; /* Temp data buffer used in filtering */ + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); + + + /* then filter verticaly... */ + vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); + +} + +void vp8_sixtap_predict16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + int FData[21*24]; /* Temp data buffer used in filtering */ + + + HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ + + /* First filter 1-D horizontally... */ + vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); + + /* then filter verticaly... */ + vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); + +} + + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_first_pass + * + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_stride : Stride of source block. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the horizontal direction to produce the filtered output + * block. Used to implement first-pass of 2-D separable filter. + * + * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. + * Two filter taps should sum to VP8_FILTER_WEIGHT. + * + ****************************************************************************/ +void vp8_filter_block2d_bil_first_pass +( + unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_stride, + unsigned int height, + unsigned int width, + const short *vp8_filter +) +{ + unsigned int i, j; + + for (i = 0; i < height; i++) + { + for (j = 0; j < width; j++) + { + /* Apply bilinear filter */ + dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[1] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; + src_ptr++; + } + + /* Next row... */ + src_ptr += src_stride - width; + dst_ptr += width; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_second_pass + * + * INPUTS : INT32 *src_ptr : Pointer to source block. + * UINT32 dst_pitch : Destination block pitch. + * UINT32 height : Block height. + * UINT32 width : Block width. + * INT32 *vp8_filter : Array of 2 bi-linear filter taps. + * + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block + * in the vertical direction to produce the filtered output + * block. Used to implement second-pass of 2-D separable filter. + * + * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. + * Two filter taps should sum to VP8_FILTER_WEIGHT. + * + ****************************************************************************/ +void vp8_filter_block2d_bil_second_pass +( + unsigned short *src_ptr, + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +) +{ + unsigned int i, j; + int Temp; + + for (i = 0; i < height; i++) + { + for (j = 0; j < width; j++) + { + /* Apply filter */ + Temp = ((int)src_ptr[0] * vp8_filter[0]) + + ((int)src_ptr[width] * vp8_filter[1]) + + (VP8_FILTER_WEIGHT / 2); + dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); + src_ptr++; + } + + /* Next row... */ + dst_ptr += dst_pitch; + } +} + + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil + * + * INPUTS : UINT8 *src_ptr : Pointer to source block. + * UINT32 src_pitch : Stride of source block. + * UINT32 dst_pitch : Stride of destination block. + * INT32 *HFilter : Array of 2 horizontal filter taps. + * INT32 *VFilter : Array of 2 vertical filter taps. + * INT32 Width : Block width + * INT32 Height : Block height + * + * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : 2-D filters an input block by applying a 2-tap + * bi-linear filter horizontally followed by a 2-tap + * bi-linear filter vertically on the result. + * + * SPECIAL NOTES : The largest block size can be handled here is 16x16 + * + ****************************************************************************/ +void vp8_filter_block2d_bil +( + unsigned char *src_ptr, + unsigned char *dst_ptr, + unsigned int src_pitch, + unsigned int dst_pitch, + const short *HFilter, + const short *VFilter, + int Width, + int Height +) +{ + + unsigned short FData[17*16]; /* Temp data buffer used in filtering */ + + /* First filter 1-D horizontally... */ + vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); + + /* then 1-D vertically... */ + vp8_filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); +} + + +void vp8_bilinear_predict4x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; +#if 0 + { + int i; + unsigned char temp1[16]; + unsigned char temp2[16]; + + bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); + vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); + + for (i = 0; i < 16; i++) + { + if (temp1[i] != temp2[i]) + { + bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); + vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); + } + } + } +#endif + vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); + +} + +void vp8_bilinear_predict8x8_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); + +} + +void vp8_bilinear_predict8x4_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); + +} + +void vp8_bilinear_predict16x16_c +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + const short *HFilter; + const short *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); +} diff -Nru libvpx-0.9.5/vp8/common/filter_c.c libvpx-0.9.6/vp8/common/filter_c.c --- libvpx-0.9.5/vp8/common/filter_c.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/filter_c.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,540 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include - -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - - -static const int bilinear_filters[8][2] = -{ - { 128, 0 }, - { 112, 16 }, - { 96, 32 }, - { 80, 48 }, - { 64, 64 }, - { 48, 80 }, - { 32, 96 }, - { 16, 112 } -}; - - -static const short sub_pel_filters[8][6] = -{ - - { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ - { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ - { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ - { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ - { 0, -1, 12, 123, -6, 0 }, - - - -}; - -void vp8_filter_block2d_first_pass -( - unsigned char *src_ptr, - int *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = Temp; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -void vp8_filter_block2d_second_pass -( - int *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = (unsigned char)Temp; - src_ptr++; - } - - /* Start next row */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_pitch; - } -} - - -void vp8_filter_block2d -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - int output_pitch, - const short *HFilter, - const short *VFilter -) -{ - int FData[9*4]; /* Temp data bufffer used in filtering */ - - /* First filter 1-D horizontally... */ - vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); - - /* then filter verticaly... */ - vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); -} - - -void vp8_block_variation_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int *HVar, - int *VVar -) -{ - int i, j; - unsigned char *Ptr = src_ptr; - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - *HVar += abs((int)Ptr[j] - (int)Ptr[j+1]); - *VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]); - } - - Ptr += src_pixels_per_line; - } -} - - - - -void vp8_sixtap_predict_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); -} -void vp8_sixtap_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data bufffer used in filtering */ - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); - - - /* then filter verticaly... */ - vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); - -} - -void vp8_sixtap_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data bufffer used in filtering */ - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); - - - /* then filter verticaly... */ - vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); - -} - -void vp8_sixtap_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[21*24]; /* Temp data bufffer used in filtering */ - - - HFilter = sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); - - /* then filter verticaly... */ - vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); - -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -void vp8_filter_block2d_bil_first_pass -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int *vp8_filter -) -{ - unsigned int i, j; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - /* Apply bilinear filter */ - output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * UINT32 pixel_step : Offset between filter input samples (see notes). - * UINT32 output_height : Input block height. - * UINT32 output_width : Input block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step=stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ -void vp8_filter_block2d_bil_second_pass -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const int *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[pixel_step] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2); - output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_pitch; - } -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pixels_per_line : Stride of input block. - * INT32 *HFilter : Array of 2 horizontal filter taps. - * INT32 *VFilter : Array of 2 vertical filter taps. - * - * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : 2-D filters an input block by applying a 2-tap - * bi-linear filter horizontally followed by a 2-tap - * bi-linear filter vertically on the result. - * - * SPECIAL NOTES : The largest block size can be handled here is 16x16 - * - ****************************************************************************/ -void vp8_filter_block2d_bil -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - unsigned int dst_pitch, - const int *HFilter, - const int *VFilter, - int Width, - int Height -) -{ - - unsigned short FData[17*16]; /* Temp data bufffer used in filtering */ - - /* First filter 1-D horizontally... */ - vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter); - - /* then 1-D vertically... */ - vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter); -} - - -void vp8_bilinear_predict4x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const int *HFilter; - const int *VFilter; - - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; -#if 0 - { - int i; - unsigned char temp1[16]; - unsigned char temp2[16]; - - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - - for (i = 0; i < 16; i++) - { - if (temp1[i] != temp2[i]) - { - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - } - } - } -#endif - vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); - -} - -void vp8_bilinear_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const int *HFilter; - const int *VFilter; - - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; - - vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); - -} - -void vp8_bilinear_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const int *HFilter; - const int *VFilter; - - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; - - vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); - -} - -void vp8_bilinear_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const int *HFilter; - const int *VFilter; - - HFilter = bilinear_filters[xoffset]; - VFilter = bilinear_filters[yoffset]; - - vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); -} diff -Nru libvpx-0.9.5/vp8/common/filter.h libvpx-0.9.6/vp8/common/filter.h --- libvpx-0.9.5/vp8/common/filter.h 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/common/filter.h 2011-03-04 20:40:39.000000000 +0000 @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef FILTER_H +#define FILTER_H + +#define BLOCK_HEIGHT_WIDTH 4 +#define VP8_FILTER_WEIGHT 128 +#define VP8_FILTER_SHIFT 7 + +extern const short vp8_bilinear_filters[8][2]; +extern const short vp8_sub_pel_filters[8][6]; + +#endif //FILTER_H diff -Nru libvpx-0.9.5/vp8/common/findnearmv.c libvpx-0.9.6/vp8/common/findnearmv.c --- libvpx-0.9.5/vp8/common/findnearmv.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/findnearmv.c 2011-03-04 20:40:39.000000000 +0000 @@ -11,47 +11,9 @@ #include "findnearmv.h" -#define FINDNEAR_SEARCH_SITES 3 - /* Predict motion vectors using those from already-decoded nearby blocks. Note that we only consider one 4x4 subblock from each candidate 16x16 macroblock. */ - -typedef union -{ - unsigned int as_int; - MV as_mv; -} int_mv; /* facilitates rapid equality tests */ - -static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) -{ - MV xmv; - xmv = x->mbmi.mv.as_mv; - - if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe]) - { - xmv.row *= -1; - xmv.col *= -1; - } - - mvp->as_mv = xmv; -} - - -void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd) -{ - if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; -} - - void vp8_find_near_mvs ( MACROBLOCKD *xd, @@ -82,7 +44,7 @@ if (above->mbmi.mv.as_int) { (++mv)->as_int = above->mbmi.mv.as_int; - mv_bias(above, refframe, mv, ref_frame_sign_bias); + mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias); ++cntx; } @@ -97,7 +59,7 @@ int_mv this_mv; this_mv.as_int = left->mbmi.mv.as_int; - mv_bias(left, refframe, &this_mv, ref_frame_sign_bias); + mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != mv->as_int) { @@ -119,7 +81,7 @@ int_mv this_mv; this_mv.as_int = aboveleft->mbmi.mv.as_int; - mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias); + mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != mv->as_int) { diff -Nru libvpx-0.9.5/vp8/common/findnearmv.h libvpx-0.9.6/vp8/common/findnearmv.h --- libvpx-0.9.5/vp8/common/findnearmv.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/findnearmv.h 2011-03-04 20:40:39.000000000 +0000 @@ -17,6 +17,41 @@ #include "modecont.h" #include "treecoder.h" +typedef union +{ + unsigned int as_int; + MV as_mv; +} int_mv; /* facilitates rapid equality tests */ + +static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) +{ + MV xmv; + xmv = mvp->as_mv; + + if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) + { + xmv.row *= -1; + xmv.col *= -1; + } + + mvp->as_mv = xmv; +} + +#define LEFT_TOP_MARGIN (16 << 3) +#define RIGHT_BOTTOM_MARGIN (16 << 3) +static void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd) +{ + if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) + mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; + else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) + mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; + + if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) + mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; + else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) + mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; +} + void vp8_find_near_mvs ( MACROBLOCKD *xd, @@ -35,8 +70,4 @@ const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride); -#define LEFT_TOP_MARGIN (16 << 3) -#define RIGHT_BOTTOM_MARGIN (16 << 3) - - #endif diff -Nru libvpx-0.9.5/vp8/common/fourcc.hpp libvpx-0.9.6/vp8/common/fourcc.hpp --- libvpx-0.9.5/vp8/common/fourcc.hpp 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/fourcc.hpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef FOURCC_HPP -#define FOURCC_HPP - -#include -#include - - -#if defined(__POWERPC__) || defined(__APPLE__) || defined(__MERKS__) -using namespace std; -#endif - -class four_cc -{ -public: - - four_cc(); - four_cc(const char*); - explicit four_cc(unsigned long); - - bool operator==(const four_cc&) const; - bool operator!=(const four_cc&) const; - - bool operator==(const char*) const; - bool operator!=(const char*) const; - - operator unsigned long() const; - unsigned long as_long() const; - - four_cc& operator=(unsigned long); - - char operator[](int) const; - - std::ostream& put(std::ostream&) const; - - bool printable() const; - -private: - - union - { - char code[4]; - unsigned long code_as_long; - }; - -}; - - -inline four_cc::four_cc() -{ -} - -inline four_cc::four_cc(unsigned long x) - : code_as_long(x) -{ -} - -inline four_cc::four_cc(const char* str) -{ - memcpy(code, str, 4); -} - - -inline bool four_cc::operator==(const four_cc& rhs) const -{ - return code_as_long == rhs.code_as_long; -} - -inline bool four_cc::operator!=(const four_cc& rhs) const -{ - return !operator==(rhs); -} - -inline bool four_cc::operator==(const char* rhs) const -{ - return (memcmp(code, rhs, 4) == 0); -} - -inline bool four_cc::operator!=(const char* rhs) const -{ - return !operator==(rhs); -} - - -inline four_cc::operator unsigned long() const -{ - return code_as_long; -} - -inline unsigned long four_cc::as_long() const -{ - return code_as_long; -} - -inline char four_cc::operator[](int i) const -{ - return code[i]; -} - -inline four_cc& four_cc::operator=(unsigned long val) -{ - code_as_long = val; - return *this; -} - -inline std::ostream& operator<<(std::ostream& os, const four_cc& rhs) -{ - return rhs.put(os); -} - -#endif diff -Nru libvpx-0.9.5/vp8/common/generic/systemdependent.c libvpx-0.9.6/vp8/common/generic/systemdependent.c --- libvpx-0.9.5/vp8/common/generic/systemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/generic/systemdependent.c 2011-03-04 20:40:39.000000000 +0000 @@ -10,12 +10,12 @@ #include "vpx_ports/config.h" -#include "g_common.h" -#include "subpixel.h" -#include "loopfilter.h" -#include "recon.h" -#include "idct.h" -#include "onyxc_int.h" +#include "vp8/common/g_common.h" +#include "vp8/common/subpixel.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/recon.h" +#include "vp8/common/idct.h" +#include "vp8/common/onyxc_int.h" extern void vp8_arch_x86_common_init(VP8_COMMON *ctx); extern void vp8_arch_arm_common_init(VP8_COMMON *ctx); @@ -65,11 +65,13 @@ rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c; #if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR) - rtcd->postproc.down = vp8_mbpost_proc_down_c; - rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; - rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; - rtcd->postproc.addnoise = vp8_plane_add_noise_c; - rtcd->postproc.blend_mb = vp8_blend_mb_c; + rtcd->postproc.down = vp8_mbpost_proc_down_c; + rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; + rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; + rtcd->postproc.addnoise = vp8_plane_add_noise_c; + rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c; + rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c; + rtcd->postproc.blend_b = vp8_blend_b_c; #endif #endif diff -Nru libvpx-0.9.5/vp8/common/loopfilter.c libvpx-0.9.6/vp8/common/loopfilter.c --- libvpx-0.9.5/vp8/common/loopfilter.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/loopfilter.c 2011-03-04 20:40:39.000000000 +0000 @@ -28,13 +28,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -44,7 +44,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Vertical MB Filtering */ @@ -52,13 +52,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -68,7 +68,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } /* Horizontal B Filtering */ @@ -81,10 +81,10 @@ vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -109,10 +109,10 @@ vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -137,8 +137,6 @@ int block_inside_limit = 0; int HEVThresh; - const int yhedge_boost = 2; - const int uvhedge_boost = 2; /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */ for (i = 0; i <= MAX_LOOP_FILTER; i++) @@ -182,15 +180,9 @@ for (j = 0; j < 16; j++) { lfi[i].lim[j] = block_inside_limit; - lfi[i].mbflim[j] = filt_lvl + yhedge_boost; - lfi[i].mbthr[j] = HEVThresh; + lfi[i].mbflim[j] = filt_lvl + 2; lfi[i].flim[j] = filt_lvl; lfi[i].thr[j] = HEVThresh; - lfi[i].uvlim[j] = block_inside_limit; - lfi[i].uvmbflim[j] = filt_lvl + uvhedge_boost; - lfi[i].uvmbthr[j] = HEVThresh; - lfi[i].uvflim[j] = filt_lvl; - lfi[i].uvthr[j] = HEVThresh; } } @@ -249,57 +241,52 @@ for (j = 0; j < 16; j++) { /*lfi[i].lim[j] = block_inside_limit; - lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/ - lfi[i].mbthr[j] = HEVThresh; + lfi[i].mbflim[j] = filt_lvl+2;*/ /*lfi[i].flim[j] = filt_lvl;*/ lfi[i].thr[j] = HEVThresh; - /*lfi[i].uvlim[j] = block_inside_limit; - lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/ - lfi[i].uvmbthr[j] = HEVThresh; - /*lfi[i].uvflim[j] = filt_lvl;*/ - lfi[i].uvthr[j] = HEVThresh; } } } -void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level) +int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level) { MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi; if (mbd->mode_ref_lf_delta_enabled) { /* Apply delta for reference frame */ - *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame]; + filter_level += mbd->ref_lf_deltas[mbmi->ref_frame]; /* Apply delta for mode */ if (mbmi->ref_frame == INTRA_FRAME) { /* Only the split mode BPRED has a further special case */ if (mbmi->mode == B_PRED) - *filter_level += mbd->mode_lf_deltas[0]; + filter_level += mbd->mode_lf_deltas[0]; } else { /* Zero motion mode */ if (mbmi->mode == ZEROMV) - *filter_level += mbd->mode_lf_deltas[1]; + filter_level += mbd->mode_lf_deltas[1]; /* Split MB motion mode */ else if (mbmi->mode == SPLITMV) - *filter_level += mbd->mode_lf_deltas[3]; + filter_level += mbd->mode_lf_deltas[3]; /* All other inter motion modes (Nearest, Near, New) */ else - *filter_level += mbd->mode_lf_deltas[2]; + filter_level += mbd->mode_lf_deltas[2]; } /* Range check */ - if (*filter_level > MAX_LOOP_FILTER) - *filter_level = MAX_LOOP_FILTER; - else if (*filter_level < 0) - *filter_level = 0; + if (filter_level > MAX_LOOP_FILTER) + filter_level = MAX_LOOP_FILTER; + else if (filter_level < 0) + filter_level = 0; } + return filter_level; } @@ -373,7 +360,7 @@ * These specified to 8th pel as they are always compared to values that are in 1/8th pel units * Apply any context driven MB level adjustment */ - vp8_adjust_mb_lf_value(mbd, &filter_level); + filter_level = vp8_adjust_mb_lf_value(mbd, filter_level); if (filter_level) { @@ -473,7 +460,7 @@ filter_level = baseline_filter_level[Segment]; /* Apply any context driven MB level adjustment */ - vp8_adjust_mb_lf_value(mbd, &filter_level); + filter_level = vp8_adjust_mb_lf_value(mbd, filter_level); if (filter_level) { diff -Nru libvpx-0.9.5/vp8/common/loopfilter.h libvpx-0.9.6/vp8/common/loopfilter.h --- libvpx-0.9.5/vp8/common/loopfilter.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/loopfilter.h 2011-03-04 20:40:39.000000000 +0000 @@ -32,12 +32,6 @@ DECLARE_ALIGNED(16, signed char, flim[16]); DECLARE_ALIGNED(16, signed char, thr[16]); DECLARE_ALIGNED(16, signed char, mbflim[16]); - DECLARE_ALIGNED(16, signed char, mbthr[16]); - DECLARE_ALIGNED(16, signed char, uvlim[16]); - DECLARE_ALIGNED(16, signed char, uvflim[16]); - DECLARE_ALIGNED(16, signed char, uvthr[16]); - DECLARE_ALIGNED(16, signed char, uvmbflim[16]); - DECLARE_ALIGNED(16, signed char, uvmbthr[16]); } loop_filter_info; diff -Nru libvpx-0.9.5/vp8/common/mac_specs.h libvpx-0.9.6/vp8/common/mac_specs.h --- libvpx-0.9.5/vp8/common/mac_specs.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/mac_specs.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if !defined(_mac_specs_h) -#define _mac_specs_h - - -#if defined(__cplusplus) -extern "C" { -#endif - - extern unsigned int vp8_read_tsc(); - - extern unsigned int vp8_get_processor_freq(); - - extern unsigned int vpx_has_altivec(); - -#if defined(__cplusplus) -} -#endif - - -#endif diff -Nru libvpx-0.9.5/vp8/common/onyxc_int.h libvpx-0.9.6/vp8/common/onyxc_int.h --- libvpx-0.9.5/vp8/common/onyxc_int.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/onyxc_int.h 2011-03-04 20:40:39.000000000 +0000 @@ -105,7 +105,7 @@ YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; - FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */ + FRAME_TYPE last_frame_type; /* Save last frame's frame type for loopfilter init checking and motion search. */ FRAME_TYPE frame_type; int show_frame; @@ -200,7 +200,7 @@ } VP8_COMMON; -void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level); +int vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int filter_level); void vp8_init_loop_filter(VP8_COMMON *cm); void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type); extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val); diff -Nru libvpx-0.9.5/vp8/common/onyxd.h libvpx-0.9.6/vp8/common/onyxd.h --- libvpx-0.9.5/vp8/common/onyxd.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/onyxd.h 2011-03-04 20:40:39.000000000 +0000 @@ -51,7 +51,7 @@ int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst); int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp); - int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags); + int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags); int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); diff -Nru libvpx-0.9.5/vp8/common/onyx.h libvpx-0.9.6/vp8/common/onyx.h --- libvpx-0.9.5/vp8/common/onyx.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/onyx.h 2011-03-04 20:40:39.000000000 +0000 @@ -18,6 +18,7 @@ #endif #include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8cx.h" #include "vpx_scale/yv12config.h" #include "type_aliases.h" #include "ppflags.h" @@ -45,7 +46,8 @@ typedef enum { USAGE_STREAM_FROM_SERVER = 0x0, - USAGE_LOCAL_FILE_PLAYBACK = 0x1 + USAGE_LOCAL_FILE_PLAYBACK = 0x1, + USAGE_CONSTRAINED_QUALITY = 0x2 } END_USAGE; @@ -149,6 +151,7 @@ int fixed_q; int worst_allowed_q; int best_allowed_q; + int cq_level; // allow internal resizing ( currently disabled in the build !!!!!) int allow_spatial_resampling; @@ -186,9 +189,10 @@ int arnr_strength ; int arnr_type ; - struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; + + vp8e_tuning tuning; } VP8_CONFIG; @@ -204,7 +208,7 @@ // and not just a copy of the pointer.. int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp); int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush); - int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags); + int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags); int vp8_update_reference(VP8_PTR comp, int ref_frame_flags); diff -Nru libvpx-0.9.5/vp8/common/partialgfupdate.h libvpx-0.9.6/vp8/common/partialgfupdate.h --- libvpx-0.9.5/vp8/common/partialgfupdate.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/partialgfupdate.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __INC_PARTIALGFUPDATE_H -#define __INC_PARTIALGFUPDATE_H - -#include "onyxc_int.h" - -extern void update_gf_selective(ONYX_COMMON *cm, MACROBLOCKD *x); - -#endif diff -Nru libvpx-0.9.5/vp8/common/postproc.c libvpx-0.9.6/vp8/common/postproc.c --- libvpx-0.9.5/vp8/common/postproc.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/postproc.c 2011-03-04 20:40:39.000000000 +0000 @@ -26,7 +26,7 @@ ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128) /* global constants */ - +#if CONFIG_POSTPROC_VISUALIZER static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = { { RGB_TO_YUV(0x98FB98) }, /* PaleGreen */ @@ -41,13 +41,32 @@ { RGB_TO_YUV(0xFF0000) } /* Red */ }; -static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] = +static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] = +{ + { RGB_TO_YUV(0x6633ff) }, /* Purple */ + { RGB_TO_YUV(0xcc33ff) }, /* Magenta */ + { RGB_TO_YUV(0xff33cc) }, /* Pink */ + { RGB_TO_YUV(0xff3366) }, /* Coral */ + { RGB_TO_YUV(0x3366ff) }, /* Blue */ + { RGB_TO_YUV(0xed00f5) }, /* Dark Blue */ + { RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */ + { RGB_TO_YUV(0xff6633) }, /* Orange */ + { RGB_TO_YUV(0x33ccff) }, /* Light Blue */ + { RGB_TO_YUV(0x8ab800) }, /* Green */ + { RGB_TO_YUV(0xffcc33) }, /* Light Orange */ + { RGB_TO_YUV(0x33ffcc) }, /* Aqua */ + { RGB_TO_YUV(0x66ff33) }, /* Light Green */ + { RGB_TO_YUV(0xccff33) }, /* Yellow */ +}; + +static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = { { RGB_TO_YUV(0x00ff00) }, /* Blue */ { RGB_TO_YUV(0x0000ff) }, /* Green */ { RGB_TO_YUV(0xffff00) }, /* Yellow */ { RGB_TO_YUV(0xff0000) }, /* Red */ }; +#endif static const short kernel5[] = { @@ -476,7 +495,7 @@ * edges unblended to give distinction to macro blocks in areas * filled with the same color block. */ -void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v, +void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride) { int i, j; @@ -484,10 +503,10 @@ int u1_const = u1*((1<<16)-alpha); int v1_const = v1*((1<<16)-alpha); - y += stride + 2; - for (i = 0; i < 14; i++) + y += 2*stride + 2; + for (i = 0; i < 12; i++) { - for (j = 0; j < 14; j++) + for (j = 0; j < 12; j++) { y[j] = (y[j]*alpha + y1_const)>>16; } @@ -511,6 +530,104 @@ } } +/* Blend only the edge of the macro block. Leave center + * unblended to allow for other visualizations to be layered. + */ +void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v, + int y1, int u1, int v1, int alpha, int stride) +{ + int i, j; + int y1_const = y1*((1<<16)-alpha); + int u1_const = u1*((1<<16)-alpha); + int v1_const = v1*((1<<16)-alpha); + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 16; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + for (i = 0; i < 12; i++) + { + y[0] = (y[0]*alpha + y1_const)>>16; + y[1] = (y[1]*alpha + y1_const)>>16; + y[14] = (y[14]*alpha + y1_const)>>16; + y[15] = (y[15]*alpha + y1_const)>>16; + y += stride; + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 16; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + stride >>= 1; + + for (j = 0; j < 8; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } + u += stride; + v += stride; + + for (i = 0; i < 6; i++) + { + u[0] = (u[0]*alpha + u1_const)>>16; + v[0] = (v[0]*alpha + v1_const)>>16; + + u[7] = (u[7]*alpha + u1_const)>>16; + v[7] = (v[7]*alpha + v1_const)>>16; + + u += stride; + v += stride; + } + + for (j = 0; j < 8; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } +} + +void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v, + int y1, int u1, int v1, int alpha, int stride) +{ + int i, j; + int y1_const = y1*((1<<16)-alpha); + int u1_const = u1*((1<<16)-alpha); + int v1_const = v1*((1<<16)-alpha); + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + stride >>= 1; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } + u += stride; + v += stride; + } +} + static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height) { int dx; @@ -522,7 +639,7 @@ dy = *y1 - y0; *x1 = width; - if (dy) + if (dx) *y1 = ((width-x0)*dy)/dx + y0; } if (*x1 < 0) @@ -531,7 +648,7 @@ dy = *y1 - y0; *x1 = 0; - if (dy) + if (dx) *y1 = ((0-x0)*dy)/dx + y0; } if (*y1 > height) @@ -540,7 +657,7 @@ dy = *y1 - y0; *y1 = height; - if (dx) + if (dy) *x1 = ((height-y0)*dx)/dy + x0; } if (*y1 < 0) @@ -549,7 +666,7 @@ dy = *y1 - y0; *y1 = 0; - if (dx) + if (dy) *x1 = ((0-y0)*dx)/dy + x0; } } @@ -561,10 +678,12 @@ #define RTCD_VTABLE(oci) NULL #endif -int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags) +int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { - char message[512]; int q = oci->filter_level * 10 / 6; + int flags = ppflags->post_proc_flag; + int deblock_level = ppflags->deblocking_level; + int noise_level = ppflags->noise_level; if (!oci->frame_to_show) return -1; @@ -621,8 +740,10 @@ oci->post_proc_buffer.y_stride); } - if (flags & VP8D_DEBUG_LEVEL1) +#if CONFIG_POSTPROC_VISUALIZER + if (flags & VP8D_DEBUG_TXT_FRAME_INFO) { + char message[512]; sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d", (oci->frame_type == KEY_FRAME), oci->refresh_golden_frame, @@ -633,7 +754,7 @@ vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride); } - if (flags & VP8D_DEBUG_LEVEL2) + if (flags & VP8D_DEBUG_TXT_MBLK_MODES) { int i, j; unsigned char *y_ptr; @@ -665,7 +786,7 @@ } } - if (flags & VP8D_DEBUG_LEVEL3) + if (flags & VP8D_DEBUG_TXT_DC_DIFF) { int i, j; unsigned char *y_ptr; @@ -700,45 +821,15 @@ } } - if (flags & VP8D_DEBUG_LEVEL4) + if (flags & VP8D_DEBUG_TXT_RATE_INFO) { + char message[512]; sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate); vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride); -#if 0 - int i, j; - unsigned char *y_ptr; - YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; - int mb_rows = post->y_height >> 4; - int mb_cols = post->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = oci->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - /* vp8_filter each macro block */ - for (i = 0; i < mb_rows; i++) - { - for (j = 0; j < mb_cols; j++) - { - char zz[4]; - - sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0'); - vp8_blit_text(zz, y_ptr, post->y_stride); - mb_index ++; - y_ptr += 16; - } - - mb_index ++; /* border */ - y_ptr += post->y_stride * 16 - post->y_width; - - } - -#endif - } /* Draw motion vectors */ - if (flags & VP8D_DEBUG_LEVEL5) + if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) { YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; @@ -749,29 +840,144 @@ MODE_INFO *mi = oci->mi; int x0, y0; - for (y0 = 8; y0 < (height + 8); y0 += 16) + for (y0 = 0; y0 < height; y0 += 16) { - for (x0 = 8; x0 < (width + 8); x0 += 16) + for (x0 = 0; x0 < width; x0 += 16) { - int x1, y1; - if (mi->mbmi.mode >= NEARESTMV) + int x1, y1; + + if (!(ppflags->display_mv_flag & (1<mbmi.mode))) + { + mi++; + continue; + } + + if (mi->mbmi.mode == SPLITMV) + { + switch (mi->mbmi.partitioning) + { + case 0 : /* mv_top_bottom */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 8 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+8, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[8]; + + x1 = x0 + 8 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+8, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride); + + break; + } + case 1 : /* mv_left_right */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 + 8 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+8, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride); + + bmi = &mi->bmi[2]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 + 8 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+8, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride); + + break; + } + case 2 : /* mv_quarters */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[2]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[8]; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride); + + bmi = &mi->bmi[10]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride); + break; + } + default : + { + B_MODE_INFO *bmi = mi->bmi; + int bx0, by0; + + for (by0 = y0; by0 < (y0+16); by0 += 4) + { + for (bx0 = x0; bx0 < (x0+16); bx0 += 4) + { + MV *mv = &bmi->mv.as_mv; + + x1 = bx0 + 2 + (mv->col >> 3); + y1 = by0 + 2 + (mv->row >> 3); + + constrain_line (bx0+2, &x1, by0+2, &y1, width, height); + vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride); + + bmi++; + } + } + } + } + } + else if (mi->mbmi.mode >= NEARESTMV) { MV *mv = &mi->mbmi.mv.as_mv; + const int lx0 = x0 + 8; + const int ly0 = y0 + 8; - x1 = x0 + (mv->col >> 3); - y1 = y0 + (mv->row >> 3); + x1 = lx0 + (mv->col >> 3); + y1 = ly0 + (mv->row >> 3); - if (x1 != x0 && y1 != y0) + if (x1 != lx0 && y1 != ly0) { - constrain_line (x0, &x1, y0-1, &y1, width, height); - vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride); + constrain_line (lx0, &x1, ly0-1, &y1, width, height); + vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride); - constrain_line (x0, &x1, y0+1, &y1, width, height); - vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride); + constrain_line (lx0, &x1, ly0+1, &y1, width, height); + vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride); } else - vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride); + vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride); } + mi++; } mi++; @@ -779,9 +985,10 @@ } /* Color in block modes */ - if (flags & VP8D_DEBUG_LEVEL6) + if ((flags & VP8D_DEBUG_CLR_BLK_MODES) + && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) { - int i, j; + int y, x; YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; int height = post->y_height; @@ -791,18 +998,54 @@ int y_stride = oci->post_proc_buffer.y_stride; MODE_INFO *mi = oci->mi; - for (i = 0; i < height; i += 16) + for (y = 0; y < height; y += 16) { - for (j = 0; j < width; j += 16) + for (x = 0; x < width; x += 16) { int Y = 0, U = 0, V = 0; - Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0]; - U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1]; - V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2]; + if (mi->mbmi.mode == B_PRED && + ((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag)) + { + int by, bx; + unsigned char *yl, *ul, *vl; + B_MODE_INFO *bmi = mi->bmi; + + yl = y_ptr + x; + ul = u_ptr + (x>>1); + vl = v_ptr + (x>>1); - POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb) - (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride); + for (by = 0; by < 16; by += 4) + { + for (bx = 0; bx < 16; bx += 4) + { + if ((ppflags->display_b_modes_flag & (1<mbmi.mode)) + || (ppflags->display_mb_modes_flag & B_PRED)) + { + Y = B_PREDICTION_MODE_colors[bmi->mode][0]; + U = B_PREDICTION_MODE_colors[bmi->mode][1]; + V = B_PREDICTION_MODE_colors[bmi->mode][2]; + + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b) + (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride); + } + bmi++; + } + + yl += y_stride*4; + ul += y_stride*1; + vl += y_stride*1; + } + } + else if (ppflags->display_mb_modes_flag & (1<mbmi.mode)) + { + Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0]; + U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1]; + V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2]; + + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner) + (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride); + } mi++; } @@ -815,9 +1058,9 @@ } /* Color in frame reference blocks */ - if (flags & VP8D_DEBUG_LEVEL7) + if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag) { - int i, j; + int y, x; YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; int width = post->y_width; int height = post->y_height; @@ -827,18 +1070,21 @@ int y_stride = oci->post_proc_buffer.y_stride; MODE_INFO *mi = oci->mi; - for (i = 0; i < height; i += 16) + for (y = 0; y < height; y += 16) { - for (j = 0; j < width; j +=16) + for (x = 0; x < width; x +=16) { int Y = 0, U = 0, V = 0; - Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0]; - U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1]; - V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2]; + if (ppflags->display_ref_frame_flag & (1<mbmi.ref_frame)) + { + Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0]; + U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1]; + V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2]; - POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb) - (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride); + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer) + (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride); + } mi++; } @@ -849,6 +1095,7 @@ mi++; } } +#endif *dest = oci->post_proc_buffer; diff -Nru libvpx-0.9.5/vp8/common/postproc.h libvpx-0.9.6/vp8/common/postproc.h --- libvpx-0.9.5/vp8/common/postproc.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/postproc.h 2011-03-04 20:40:39.000000000 +0000 @@ -24,7 +24,15 @@ char whiteclamp[16], char bothclamp[16],\ unsigned int w, unsigned int h, int pitch) -#define prototype_postproc_blend_mb(sym)\ +#define prototype_postproc_blend_mb_inner(sym)\ + void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ + int y1, int u1, int v1, int alpha, int stride) + +#define prototype_postproc_blend_mb_outer(sym)\ + void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ + int y1, int u1, int v1, int alpha, int stride) + +#define prototype_postproc_blend_b(sym)\ void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ int y1, int u1, int v1, int alpha, int stride) @@ -52,22 +60,36 @@ #endif extern prototype_postproc_addnoise(vp8_postproc_addnoise); -#ifndef vp8_postproc_blend_mb -#define vp8_postproc_blend_mb vp8_blend_mb_c +#ifndef vp8_postproc_blend_mb_inner +#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c +#endif +extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner); + +#ifndef vp8_postproc_blend_mb_outer +#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c +#endif +extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer); + +#ifndef vp8_postproc_blend_b +#define vp8_postproc_blend_b vp8_blend_b_c #endif -extern prototype_postproc_blend_mb(vp8_postproc_blend_mb); +extern prototype_postproc_blend_b(vp8_postproc_blend_b); typedef prototype_postproc((*vp8_postproc_fn_t)); typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t)); typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t)); -typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t)); +typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t)); +typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t)); +typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t)); typedef struct { - vp8_postproc_inplace_fn_t down; - vp8_postproc_inplace_fn_t across; - vp8_postproc_fn_t downacross; - vp8_postproc_addnoise_fn_t addnoise; - vp8_postproc_blend_mb_fn_t blend_mb; + vp8_postproc_inplace_fn_t down; + vp8_postproc_inplace_fn_t across; + vp8_postproc_fn_t downacross; + vp8_postproc_addnoise_fn_t addnoise; + vp8_postproc_blend_mb_inner_fn_t blend_mb_inner; + vp8_postproc_blend_mb_outer_fn_t blend_mb_outer; + vp8_postproc_blend_b_fn_t blend_b; } vp8_postproc_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT @@ -89,7 +111,7 @@ #include "onyxc_int.h" #include "ppflags.h" int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, - int deblock_level, int noise_level, int flags); + vp8_ppflags_t *flags); void vp8_de_noise(YV12_BUFFER_CONFIG *source, diff -Nru libvpx-0.9.5/vp8/common/ppc/loopfilter_altivec.c libvpx-0.9.6/vp8/common/ppc/loopfilter_altivec.c --- libvpx-0.9.5/vp8/common/ppc/loopfilter_altivec.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/ppc/loopfilter_altivec.c 2011-03-04 20:40:39.000000000 +0000 @@ -56,10 +56,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void)simpler_lpf; - mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr); + mbloop_filter_horizontal_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); if (u_ptr) - mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr); + mbloop_filter_horizontal_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); } void loop_filter_mbhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -77,10 +77,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void)simpler_lpf; - mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr); + mbloop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr); if (u_ptr) - mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr); + mbloop_filter_vertical_edge_uv_ppc(u_ptr, v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr); } void loop_filter_mbvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -104,7 +104,7 @@ loop_filter_horizontal_edge_y_ppc(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr); if (u_ptr) - loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr); + loop_filter_horizontal_edge_uv_ppc(u_ptr + 4 * uv_stride, v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr); } void loop_filter_bhs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, @@ -127,7 +127,7 @@ loop_filter_vertical_edge_y_ppc(y_ptr, y_stride, lfi->flim, lfi->lim, lfi->thr); if (u_ptr) - loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr); + loop_filter_vertical_edge_uv_ppc(u_ptr + 4, v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr); } void loop_filter_bvs_ppc(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, diff -Nru libvpx-0.9.5/vp8/common/ppflags.h libvpx-0.9.6/vp8/common/ppflags.h --- libvpx-0.9.5/vp8/common/ppflags.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/ppflags.h 2011-03-04 20:40:39.000000000 +0000 @@ -13,17 +13,28 @@ #define __INC_PPFLAGS_H enum { - VP8D_NOFILTERING = 0, - VP8D_DEBLOCK = 1<<0, - VP8D_DEMACROBLOCK = 1<<1, - VP8D_ADDNOISE = 1<<2, - VP8D_DEBUG_LEVEL1 = 1<<3, - VP8D_DEBUG_LEVEL2 = 1<<4, - VP8D_DEBUG_LEVEL3 = 1<<5, - VP8D_DEBUG_LEVEL4 = 1<<6, - VP8D_DEBUG_LEVEL5 = 1<<7, - VP8D_DEBUG_LEVEL6 = 1<<8, - VP8D_DEBUG_LEVEL7 = 1<<9 + VP8D_NOFILTERING = 0, + VP8D_DEBLOCK = 1<<0, + VP8D_DEMACROBLOCK = 1<<1, + VP8D_ADDNOISE = 1<<2, + VP8D_DEBUG_TXT_FRAME_INFO = 1<<3, + VP8D_DEBUG_TXT_MBLK_MODES = 1<<4, + VP8D_DEBUG_TXT_DC_DIFF = 1<<5, + VP8D_DEBUG_TXT_RATE_INFO = 1<<6, + VP8D_DEBUG_DRAW_MV = 1<<7, + VP8D_DEBUG_CLR_BLK_MODES = 1<<8, + VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9 }; +typedef struct +{ + int post_proc_flag; + int deblocking_level; + int noise_level; + int display_ref_frame_flag; + int display_mb_modes_flag; + int display_b_modes_flag; + int display_mv_flag; +} vp8_ppflags_t; + #endif diff -Nru libvpx-0.9.5/vp8/common/predictdc.c libvpx-0.9.6/vp8/common/predictdc.c --- libvpx-0.9.5/vp8/common/predictdc.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/predictdc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include "blockd.h" - - -void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons) -{ - int diff; - int sign; - int last_dc = *lastdc; - int this_dc = *thisdc; - - if (*cons > DCPREDCNTTHRESH) - { - this_dc += last_dc; - } - - diff = abs(last_dc - this_dc); - sign = (last_dc >> 31) ^(this_dc >> 31); - sign |= (!last_dc | !this_dc); - - if (sign) - { - *cons = 0; - } - else - { - if (diff <= DCPREDSIMTHRESH * quant) - (*cons)++ ; - } - - *thisdc = this_dc; - *lastdc = this_dc; -} diff -Nru libvpx-0.9.5/vp8/common/predictdc.h libvpx-0.9.6/vp8/common/predictdc.h --- libvpx-0.9.5/vp8/common/predictdc.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/predictdc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef __PREDICTDC_H -#define __PREDICTDC_H - -void uvvp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons); -void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons); - -#endif diff -Nru libvpx-0.9.5/vp8/common/preproc.h libvpx-0.9.6/vp8/common/preproc.h --- libvpx-0.9.5/vp8/common/preproc.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/preproc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : preproc.h -* -* Description : simple preprocessor -* -****************************************************************************/ - -#ifndef __INC_PREPROC_H -#define __INC_PREPROC_H - -/**************************************************************************** -* Types -****************************************************************************/ - -typedef struct -{ - unsigned char *frame_buffer; - int frame; - unsigned int *fixed_divide; - - unsigned char *frame_buffer_alloc; - unsigned int *fixed_divide_alloc; -} pre_proc_instance; - -/**************************************************************************** -* Functions. -****************************************************************************/ -void pre_proc_machine_specific_config(void); -void delete_pre_proc(pre_proc_instance *ppi); -int init_pre_proc(pre_proc_instance *ppi, int frame_size); -extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength); -extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength); - -#endif diff -Nru libvpx-0.9.5/vp8/common/preprocif.h libvpx-0.9.6/vp8/common/preprocif.h --- libvpx-0.9.5/vp8/common/preprocif.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/preprocif.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : preproc_if.h -* -* Description : Pre-processor interface header file. -* -****************************************************************************/ - -#ifndef __PREPROC_IF_H -#define __PREPROC_IF_H - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "type_aliases.h" - -/**************************************************************************** -* Types -****************************************************************************/ - -typedef struct -{ - UINT8 *Yuv0ptr; - UINT8 *Yuv1ptr; - - UINT8 *frag_info; // blocks coded : passed in - UINT32 frag_info_element_size; // size of each element - UINT32 frag_info_coded_mask; // mask to get at whether fragment is coded - - UINT32 *region_index; // Gives pixel index for top left of each block - UINT32 video_frame_height; - UINT32 video_frame_width; - UINT8 hfrag_pixels; - UINT8 vfrag_pixels; - -} SCAN_CONFIG_DATA; - -typedef enum -{ - SCP_FILTER_ON_OFF, - SCP_SET_SRF_OFFSET, - SCP_SET_EBO_ON_OFF, - SCP_SET_VCAP_LEVEL_OFFSET, - SCP_SET_SHOW_LOCAL - -} SCP_SETTINGS; - -typedef struct PP_INSTANCE *x_pp_inst; - -/**************************************************************************** -* Module statics -****************************************************************************/ -/* Controls whether Early break out is on or off in default case */ -#define EARLY_BREAKOUT_DEFAULT TRUE - -/**************************************************************************** -* Functions -****************************************************************************/ -extern void set_scan_param(x_pp_inst ppi, UINT32 param_id, INT32 param_value); -extern UINT32 yuvanalyse_frame(x_pp_inst ppi, UINT32 *KFIndicator); -extern x_pp_inst create_pp_instance(void); -extern void delete_pp_instance(x_pp_inst *); -extern BOOL scan_yuvinit(x_pp_inst, SCAN_CONFIG_DATA *scan_config_ptr); - -#endif diff -Nru libvpx-0.9.5/vp8/common/proposed.h libvpx-0.9.6/vp8/common/proposed.h --- libvpx-0.9.5/vp8/common/proposed.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/proposed.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -typedef struct core_codec *codec_ptr; -typedef struct interface_table *interface_ptr; - -typedef struct -{ - void (*Initialize)(); - void (*Shutdown)(); - codec_ptr(*Create)(); - int (*compress_frame)(codec_ptr, unsigned int *frame_flags, YV12_BUFFER_CONFIG *sd, unsigned long *size, char *dest, INT64 time_stamp); - int (*show_frame)(codec_ptr , YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags); - void (*Remove)(codec_ptr *comp); - interface_ptr(*get_interface)(unsigned int id); - -} core_codec; - -typedef struct -{ - int (*set_bitrate)(codec_ptr, END_USAGE usage, int Datarate); - int (*get_bitrate)(codec_ptr, END_USAGE *usage, int *Datarate); - int (*set_mode)(codec_ptr, MODE mode, int Speed, char *File); - int (*get_mode)(codec_ptr, MODE *mode, int *Speed, char **File); -} codec_settings_basic; - -typedef struct -{ - int (*set_bitrate)(codec_ptr, END_USAGE usage, int Datarate); - int (*get_bitrate)(codec_ptr, END_USAGE *usage, int *Datarate); - int (*set_mode)(codec_ptr, MODE mode, int Speed, char *File); - int (*get_mode)(codec_ptr, MODE *mode, int *Speed, char **File); - int (*set_denoise)(codec_ptr, int Level); - int (*get_denoise)(codec_ptr, int *Level); - int (*set_sharpness)(codec_ptr, int sharpness); - int (*get_sharpness)(codec_ptr, int *sharpness); - int (*set_keyframing)(codec_ptr, int Auto, int max_distance); - int (*get_keyframing)(codec_ptr, int *Auto, int *max_distance); - int (*set_buffering)(codec_ptr, int buffer_level, int max_buffer_level); - int (*get_buffering)(codec_ptr, int *buffer_level, int *max_buffer_level); - int (*set_adjust_frame_rate)(codec_ptr, int Allowed, int at_buffer_level_pct); - int (*get_adjust_frame_rate)(codec_ptr, int *Allowed, int *at_buffer_level_pct); - int (*set_adjust_frame_size)(codec_ptr, int Allowed, int down_at_buffer_level_pct, int up_at_buffer_level_pct); - int (*get_adjust_frame_size)(codec_ptr, int *Allowed, int *down_at_buffer_level_pct, int *up_at_buffer_level_pct); - int (*set_adjust_quality)(codec_ptr, int Allowed, int min_quantizer, int max_quantizer); - int (*get_adjust_quality)(codec_ptr, int *Allowed, int *min_quantizer, int *max_quantizer); - int (*set_vbrparms)(codec_ptr, int Bias, int Min, int Max); - int (*get_vbrparms)(codec_ptr, int *Bias, int *Min, int *Max); - -} codec_settings_v1; - -typedef struct -{ - int (*request_recovery)(codec_ptr); - int (*request_droppable)(codec_ptr); - int (*internal_size)(codec_ptr, VPX_SCALING Vertical, VPX_SCALING Horizontal); - int (*update_last)(codec_ptr); - int (*update_gold)(codec_ptr); - int (*use_only_last)(codec_ptr); - int (*use_only_gold)(codec_ptr); - int (*update_entropy)(codec_ptr); - -} codec_realtime_requests; diff -Nru libvpx-0.9.5/vp8/common/recon.h libvpx-0.9.6/vp8/common/recon.h --- libvpx-0.9.5/vp8/common/recon.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/recon.h 2011-03-04 20:40:39.000000000 +0000 @@ -94,6 +94,5 @@ #define RECON_INVOKE(ctx,fn) vp8_recon_##fn #endif -void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); #endif diff -Nru libvpx-0.9.5/vp8/common/reconintra4x4.c libvpx-0.9.6/vp8/common/reconintra4x4.c --- libvpx-0.9.5/vp8/common/reconintra4x4.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/reconintra4x4.c 2011-03-04 20:40:39.000000000 +0000 @@ -313,89 +313,3 @@ } -void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - int i; - - vp8_intra_prediction_down_copy(x); - -#if ARCH_ARM - { - BLOCKD *b = &x->block[0]; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -#else - for (i = 0; i < 16; i++) - { - BLOCKD *b = &x->block[i]; - - vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -#endif - - vp8_recon_intra_mbuv(rtcd, x); - -} diff -Nru libvpx-0.9.5/vp8/common/threading.h libvpx-0.9.6/vp8/common/threading.h --- libvpx-0.9.5/vp8/common/threading.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/threading.h 2011-03-04 20:40:39.000000000 +0000 @@ -14,6 +14,8 @@ #define VPXINFINITE 10000 /* 10second. */ +#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD + /* Thread management macros */ #ifdef _WIN32 /* Win32 */ @@ -88,4 +90,6 @@ #define x86_pause_hint() #endif +#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ + #endif diff -Nru libvpx-0.9.5/vp8/common/vfwsetting.hpp libvpx-0.9.6/vp8/common/vfwsetting.hpp --- libvpx-0.9.5/vp8/common/vfwsetting.hpp 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/vfwsetting.hpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if !defined(VFWSETTING_HPP) -#define VFWSETTING_HPP -//______________________________________________________________________________ -// -// VFWSetting.hpp -// - -#include "four_cc.hpp" -#include - -namespace vpxvp -{ - - //-------------------------------------- - class VFWSetting - { - friend std::ostream& operator<<(std::ostream& os, const VFWSetting& vfws); - - public: - - enum Mode - { - m_setting, - m_config - }; - - enum - { - header_size = 8, - Size = 16 - }; - - VFWSetting(four_cc fcc); - ~VFWSetting(); - - four_cc fcc() const; - Mode mode() const; - - int setting() const; - int value() const; - void setting_value(int i_setting, int i_value); // Sets mode to m_setting - - long size() const; - const void* data() const; - int data(const void* p_data, unsigned long ul_size); - - private: - - VFWSetting(const VFWSetting& vfws); // Not implemented - VFWSetting& operator=(const VFWSetting& vfws); // Not implemented - - int extract_(const void* p_data, unsigned long ul_size); - void update_() const; - - four_cc m_fcc; - Mode m_mode; - int m_i_setting; - int m_i_value; - - mutable unsigned char m_p_data[Size]; - }; - -} // namespace vpxvp - -#endif // VFWSETTING_HPP diff -Nru libvpx-0.9.5/vp8/common/vpxblit_c64.h libvpx-0.9.6/vp8/common/vpxblit_c64.h --- libvpx-0.9.5/vp8/common/vpxblit_c64.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/vpxblit_c64.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _VPX_BLIT_C64_h -#define _VPX_BLIT_C64_h - -/**************************************************************************** -* Typedefs -****************************************************************************/ - -typedef struct // YUV buffer configuration structure -{ - int y_width; - int y_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_stride; - - unsigned char *y_buffer; - unsigned char *u_buffer; - unsigned char *v_buffer; - - unsigned char *y_ptr_scrn; - unsigned char *u_ptr_scrn; - unsigned char *v_ptr_scrn; - -} DXV_YUV_BUFFER_CONFIG; - -typedef struct -{ - unsigned char *rgbptr_scrn; - unsigned char *y_ptr_scrn; - unsigned char *u_ptr_scrn; - unsigned char *v_ptr_scrn; - unsigned char *rgbptr_scrn2; -} DXV_FINAL_VIDEO; - -#endif /* include guards */ diff -Nru libvpx-0.9.5/vp8/common/vpxblit.h libvpx-0.9.6/vp8/common/vpxblit.h --- libvpx-0.9.5/vp8/common/vpxblit.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/vpxblit.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPXBLIT_H_INCL -#define VPXBLIT_H_INCL -/*============================================================================== - Includes -==============================================================================*/ - -/*============================================================================== - Defines -==============================================================================*/ - - -#ifdef VPX_BIG_ENDIAN -#define BYTE_ZERO(X) ((X & 0xFF000000) >> (24 - 2) ) -#define BYTE_ONE(X) ((X & 0x00FF0000) >> (16 - 2) ) -#define BYTE_TWO(X) ((X & 0x0000FF00) >> (8 - 2) ) -#define BYTE_THREE(X) ((X & 0x000000FF) << (0 + 2) ) - -#define BYTE_ZERO_UV(X) ((X & 0x0000FF00) >> (8 - 2) ) -#define BYTE_ONE_UV(X) ((X & 0x000000FF) << (0 + 2) ) - -#define REREFERENCE(X) (*((int *) &(X))) - -#else - -#define BYTE_THREE(X) ((X & 0xFF000000) >> (24 - 2) ) -#define BYTE_TWO(X) ((X & 0x00FF0000) >> (16 - 2) ) -#define BYTE_ONE(X) ((X & 0x0000FF00) >> (8 - 2) ) -#define BYTE_ZERO(X) ((X & 0x000000FF) << (0 + 2) ) - -#define BYTE_ONE_UV(X) ((X & 0x0000FF00) >> (8 - 2) ) -#define BYTE_ZERO_UV(X) ((X & 0x000000FF) << (0 + 2) ) - -#define REREFERENCE(X) (*((int *) &(X))) - -#endif - - -/*============================================================================== - Type Definitions -==============================================================================*/ -typedef struct // YUV buffer configuration structure -{ - int y_width; - int y_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_stride; - - char *y_buffer; - char *u_buffer; - char *v_buffer; - - char *uv_start; - int uv_dst_area; - int uv_used_area; - -} VPX_BLIT_CONFIG; - -typedef struct tx86_params -{ - unsigned int pushed_registers[6]; - unsigned int return_address; - unsigned int dst; - unsigned int scrn_pitch; - VPX_BLIT_CONFIG *buff_config; -} x86_params; - -/*============================================================================= - Enums -==============================================================================*/ - - -/*============================================================================== - Structures -==============================================================================*/ - -/*============================================================================== - Constants -==============================================================================*/ - - -/*============================================================================== - Variables -==============================================================================*/ - - - - -/*============================================================================== - Function Protoypes/MICROS -==============================================================================*/ -int vpx_get_size_of_pixel(unsigned int bd); -void *vpx_get_blitter(unsigned int bd); -void vpx_set_blit(void); -void vpx_destroy_blit(void); - - - -#endif //VPXBLIT_H_INCL diff -Nru libvpx-0.9.5/vp8/common/vpxerrors.h libvpx-0.9.6/vp8/common/vpxerrors.h --- libvpx-0.9.5/vp8/common/vpxerrors.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/vpxerrors.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - - -#define ALLOC_FAILURE -2 diff -Nru libvpx-0.9.5/vp8/common/vpx_ref_build_prefix.h libvpx-0.9.6/vp8/common/vpx_ref_build_prefix.h --- libvpx-0.9.5/vp8/common/vpx_ref_build_prefix.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/vpx_ref_build_prefix.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef _VPX_REF_BUILD_PREFIX_h -#define _VPX_REF_BUILD_PREFIX_h - -#if defined(__cplusplus) -extern "C" { -#endif - - -#if defined(__cplusplus) -} -#endif - -#endif /* include guards */ diff -Nru libvpx-0.9.5/vp8/common/x86/loopfilter_x86.c libvpx-0.9.6/vp8/common/x86/loopfilter_x86.c --- libvpx-0.9.5/vp8/common/x86/loopfilter_x86.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/x86/loopfilter_x86.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,7 +10,7 @@ #include "vpx_ports/config.h" -#include "loopfilter.h" +#include "vp8/common/loopfilter.h" prototype_loopfilter(vp8_loop_filter_horizontal_edge_c); prototype_loopfilter(vp8_loop_filter_vertical_edge_c); @@ -45,13 +45,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } @@ -62,7 +62,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } @@ -71,13 +71,13 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); } @@ -88,7 +88,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } @@ -102,10 +102,10 @@ vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } @@ -132,10 +132,10 @@ vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); if (v_ptr) - vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); } @@ -159,10 +159,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); + vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); } @@ -173,7 +173,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } @@ -182,10 +182,10 @@ int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { (void) simpler_lpf; - vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); + vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); } @@ -196,7 +196,7 @@ (void) v_ptr; (void) uv_stride; (void) simpler_lpf; - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); } @@ -210,7 +210,7 @@ vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride); + vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride); } @@ -237,7 +237,7 @@ vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4); + vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4); } diff -Nru libvpx-0.9.5/vp8/common/x86/vp8_asm_stubs.c libvpx-0.9.6/vp8/common/x86/vp8_asm_stubs.c --- libvpx-0.9.5/vp8/common/x86/vp8_asm_stubs.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/x86/vp8_asm_stubs.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ #include "vpx_ports/config.h" #include "vpx_ports/mem.h" -#include "subpixel.h" +#include "vp8/common/subpixel.h" extern const short vp8_six_tap_mmx[8][6*8]; extern const short vp8_bilinear_filters_mmx[8][2*8]; diff -Nru libvpx-0.9.5/vp8/common/x86/x86_systemdependent.c libvpx-0.9.6/vp8/common/x86/x86_systemdependent.c --- libvpx-0.9.5/vp8/common/x86/x86_systemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/common/x86/x86_systemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,13 +11,13 @@ #include "vpx_ports/config.h" #include "vpx_ports/x86.h" -#include "g_common.h" -#include "subpixel.h" -#include "loopfilter.h" -#include "recon.h" -#include "idct.h" -#include "pragmas.h" -#include "onyxc_int.h" +#include "vp8/common/g_common.h" +#include "vp8/common/subpixel.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/recon.h" +#include "vp8/common/idct.h" +#include "vp8/common/pragmas.h" +#include "vp8/common/onyxc_int.h" void vp8_arch_x86_common_init(VP8_COMMON *ctx) { diff -Nru libvpx-0.9.5/vp8/decoder/arm/arm_dsystemdependent.c libvpx-0.9.6/vp8/decoder/arm/arm_dsystemdependent.c --- libvpx-0.9.5/vp8/decoder/arm/arm_dsystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/arm_dsystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,12 +11,11 @@ #include "vpx_ports/config.h" #include "vpx_ports/arm.h" -#include "blockd.h" -#include "pragmas.h" -#include "postproc.h" -#include "dboolhuff.h" -#include "dequantize.h" -#include "onyxd_int.h" +#include "vp8/common/blockd.h" +#include "vp8/common/pragmas.h" +#include "vp8/common/postproc.h" +#include "vp8/decoder/dequantize.h" +#include "vp8/decoder/onyxd_int.h" void vp8_arch_arm_decode_init(VP8D_COMP *pbi) { @@ -35,12 +34,6 @@ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6; pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; -#if 0 /*For use with RTCD, when implemented*/ - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; -#endif } #endif @@ -54,12 +47,6 @@ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon; pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; -#if 0 /*For use with RTCD, when implemented*/ - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; -#endif } #endif #endif diff -Nru libvpx-0.9.5/vp8/decoder/arm/armv6/dboolhuff_v6.asm libvpx-0.9.6/vp8/decoder/arm/armv6/dboolhuff_v6.asm --- libvpx-0.9.5/vp8/decoder/arm/armv6/dboolhuff_v6.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/armv6/dboolhuff_v6.asm 1970-01-01 00:00:00.000000000 +0000 @@ -1,163 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_decode_value_v6| - EXPORT |vp8dx_start_decode_v6| - EXPORT |vp8dx_stop_decode_v6| - EXPORT |vp8dx_decode_bool_v6| - - ARM - REQUIRE8 - PRESERVE8 - - INCLUDE vpx_asm_offsets.asm - -br RN r0 -prob RN r1 -bits RN r1 - AREA |.text|, CODE, READONLY ; name this block of code - -; int z = 0; -; int bit; -; for ( bit=bits-1; bit>=0; bit-- ) -; { -; z |= (vp8dx_decode_bool(br, 0x80)<> 8) - mov r12, #1 - smlawb r6, r6, prob, r12 - - mov lr, #0 - subs r5, r3, r6, lsl #24 - - ;cmp r3, r1 - movhs lr, #1 - movhs r3, r5 - subhs r2, r2, r6 - movlo r2, r6 - - cmp r2, #0x80 - blt range_less_0x80 - ;strd r2, r3, [br, #bool_decoder_range] - str r2, [br, #bool_decoder_range] - str r3, [br, #bool_decoder_value] - mov r0, lr - ldmia sp!, {r4 - r6, pc} - -range_less_0x80 - ldr r5, [br, #bool_decoder_pos] - ldr r1, [br, #bool_decoder_buffer] - ldr r4, [br, #bool_decoder_count] - add r1, r1, r5 - - clz r12, r2 - sub r12, r12, #24 - subs r4, r4, r12 - ldrleb r6, [r1], #1 - mov r2, r2, lsl r12 - mov r3, r3, lsl r12 - addle r4, r4, #8 - rsble r12, r4, #8 - addle r5, r5, #1 - orrle r3, r3, r6, lsl r12 - - ;strd r2, r3, [br, #bool_decoder_range] - ;strd r4, r5, [br, #bool_decoder_count] - str r2, [br, #bool_decoder_range] - str r3, [br, #bool_decoder_value] - str r4, [br, #bool_decoder_count] - str r5, [br, #bool_decoder_pos] - - mov r0, lr - - ldmia sp!, {r4 - r6, pc} - ENDP ; |vp8dx_decode_bool_v6| - - END diff -Nru libvpx-0.9.5/vp8/decoder/arm/armv6/idct_blk_v6.c libvpx-0.9.6/vp8/decoder/arm/armv6/idct_blk_v6.c --- libvpx-0.9.5/vp8/decoder/arm/armv6/idct_blk_v6.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/armv6/idct_blk_v6.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ #include "vpx_ports/config.h" -#include "idct.h" -#include "dequantize.h" +#include "vp8/common/idct.h" +#include "vp8/decoder/dequantize.h" void vp8_dequant_dc_idct_add_y_block_v6 (short *q, short *dq, unsigned char *pre, diff -Nru libvpx-0.9.5/vp8/decoder/arm/dboolhuff_arm.h libvpx-0.9.6/vp8/decoder/arm/dboolhuff_arm.h --- libvpx-0.9.5/vp8/decoder/arm/dboolhuff_arm.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/dboolhuff_arm.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -#ifndef DBOOLHUFF_ARM_H -#define DBOOLHUFF_ARM_H - -/* JLK - * There are currently no arm-optimized versions of - * these functions. As they are implemented, they - * can be uncommented below and added to - * arm/dsystemdependent.c - * - * The existing asm code is likely so different as - * to be useless. However, its been left (for now) - * for reference. - */ -#if 0 -#if HAVE_ARMV6 -#undef vp8_dbool_start -#define vp8_dbool_start vp8dx_start_decode_v6 - -#undef vp8_dbool_fill -#define vp8_dbool_fill vp8_bool_decoder_fill_v6 - -#undef vp8_dbool_debool -#define vp8_dbool_debool vp8_decode_bool_v6 - -#undef vp8_dbool_devalue -#define vp8_dbool_devalue vp8_decode_value_v6 -#endif /* HAVE_ARMV6 */ - -#if HAVE_ARMV7 -#undef vp8_dbool_start -#define vp8_dbool_start vp8dx_start_decode_neon - -#undef vp8_dbool_fill -#define vp8_dbool_fill vp8_bool_decoder_fill_neon - -#undef vp8_dbool_debool -#define vp8_dbool_debool vp8_decode_bool_neon - -#undef vp8_dbool_devalue -#define vp8_dbool_devalue vp8_decode_value_neon -#endif /* HAVE_ARMV7 */ -#endif -#endif /* DBOOLHUFF_ARM_H */ diff -Nru libvpx-0.9.5/vp8/decoder/arm/dequantize_arm.c libvpx-0.9.6/vp8/decoder/arm/dequantize_arm.c --- libvpx-0.9.5/vp8/decoder/arm/dequantize_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/dequantize_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,9 +10,8 @@ #include "vpx_ports/config.h" -#include "dequantize.h" -#include "predictdc.h" -#include "idct.h" +#include "vp8/decoder/dequantize.h" +#include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" #if HAVE_ARMV7 diff -Nru libvpx-0.9.5/vp8/decoder/arm/detokenize_arm.h libvpx-0.9.6/vp8/decoder/arm/detokenize_arm.h --- libvpx-0.9.5/vp8/decoder/arm/detokenize_arm.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/detokenize_arm.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef DETOKENIZE_ARM_H -#define DETOKENIZE_ARM_H - -#if HAVE_ARMV6 -#if CONFIG_ARM_ASM_DETOK -void vp8_init_detokenizer(VP8D_COMP *dx); -void vp8_decode_mb_tokens_v6(DETOK *detoken, int type); -#endif -#endif - -#endif diff -Nru libvpx-0.9.5/vp8/decoder/arm/detokenize.asm libvpx-0.9.6/vp8/decoder/arm/detokenize.asm --- libvpx-0.9.5/vp8/decoder/arm/detokenize.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/detokenize.asm 1970-01-01 00:00:00.000000000 +0000 @@ -1,320 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_decode_mb_tokens_v6| - - AREA |.text|, CODE, READONLY ; name this block of code - - INCLUDE vpx_asm_offsets.asm - -l_qcoeff EQU 0 -l_i EQU 4 -l_type EQU 8 -l_stop EQU 12 -l_c EQU 16 -l_l_ptr EQU 20 -l_a_ptr EQU 24 -l_bc EQU 28 -l_coef_ptr EQU 32 -l_stacksize EQU 64 - - -;; constant offsets -- these should be created at build time -c_block2above_offset EQU 25 -c_entropy_nodes EQU 11 -c_dct_eob_token EQU 11 - -|vp8_decode_mb_tokens_v6| PROC - stmdb sp!, {r4 - r11, lr} - sub sp, sp, #l_stacksize - mov r7, r1 ; type - mov r9, r0 ; detoken - - ldr r1, [r9, #detok_current_bc] - ldr r0, [r9, #detok_qcoeff_start_ptr] - mov r11, #0 ; i - mov r3, #16 ; stop - - cmp r7, #1 ; type ?= 1 - addeq r11, r11, #24 ; i = 24 - addeq r3, r3, #8 ; stop = 24 - addeq r0, r0, #3, 24 ; qcoefptr += 24*16 - - str r0, [sp, #l_qcoeff] - str r11, [sp, #l_i] - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - str r1, [sp, #l_bc] - - add lr, r9, r7, lsl #2 ; detoken + type*4 - - ldr r8, [r1, #bool_decoder_user_buffer] - - ldr r10, [lr, #detok_coef_probs] - ldr r5, [r1, #bool_decoder_count] - ldr r6, [r1, #bool_decoder_range] - ldr r4, [r1, #bool_decoder_value] - - str r10, [sp, #l_coef_ptr] - -BLOCK_LOOP - ldr r3, [r9, #detok_ptr_block2leftabove] - ldr r1, [r9, #detok_L] - ldr r2, [r9, #detok_A] - ldrb r12, [r3, r11]! ; block2left[i] - ldrb r3, [r3, #c_block2above_offset]; block2above[i] - - cmp r7, #0 ; c = !type - moveq r7, #1 - movne r7, #0 - - ldrb r0, [r1, r12]! ; *(L += block2left[i]) - ldrb r3, [r2, r3]! ; *(A += block2above[i]) - mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11 - -; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0) - cmp r0, #0 ; *l ?= 0 - movne r0, #1 - cmp r3, #0 ; *a ?= 0 - addne r0, r0, #1 ; t - - str r1, [sp, #l_l_ptr] ; save &l - str r2, [sp, #l_a_ptr] ; save &a - smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES) - mov r1, #0 ; t = 0 - str r7, [sp, #l_c] - - ;align 4 -COEFF_LOOP - ldr r3, [r9, #detok_ptr_coef_bands_x] - ldr lr, [r9, #detok_coef_tree_ptr] - ;STALL - ldrb r3, [r3, r7] ; coef_bands_x[c] - ;STALL - ;STALL - add r0, r0, r3 ; Prob += coef_bands_x[c] - -get_token_loop - ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1] - mov r3, r6, lsl #8 ; range << 8 - sub r3, r3, #256 ; (range << 8) - (1 << 8) - mov r10, #1 ; 1 - - smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8) - - ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr - ;++ - - subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE - addhs r1, r1, #1 ; t += 1 - movhs r4, r3 ; value -= bigsplit (split << 24) - subhs r2, r6, r2 ; range -= split - ; movlo r6, r2 ; range = split - - ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t] - -; NORMALIZE - clz r3, r2 ; vp8dx_bitreader_norm[range] + 24 - sub r3, r3, #24 ; vp8dx_bitreader_norm[range] - subs r5, r5, r3 ; count -= shift - mov r6, r2, lsl r3 ; range <<= shift - mov r4, r4, lsl r3 ; value <<= shift - -; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16 - addle r5, r5, #8 ; count += 8 - rsble r3, r5, #24 ; 24 - count - addle r8, r8, #1 ; bufptr++ - orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16 - - cmp r1, #0 ; t ?= 0 - bgt get_token_loop ; while (t > 0) - - cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN) - beq END_OF_BLOCK ; break - - rsb lr, r1, #0 ; v = -t; - - cmp lr, #4 ; if(v > FOUR_TOKEN) - ble SKIP_EXTRABITS - - ldr r3, [r9, #detok_teb_base_ptr] - mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count - add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4) - - ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val - ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length - -extrabits_loop - add r3, r0, r7 ; &teb_ptr->Probs[bits_count] - - ldrb r2, [r3, #4] ; probability. why +4? - mov r3, r6, lsl #8 ; range << 8 - sub r3, r3, #256 ; range << 8 + 1 << 8 - - smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8) - - ldrb r12, [r8] ; *bufptr - ;++ - - subs r10, r4, r2, lsl #24 ; value - (split<<24) - movhs r4, r10 ; value = value - (split << 24) - subhs r2, r6, r2 ; range = range - split - addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<> 1 - - subs r3, r4, r2, lsl #24 ; value - (split<<24) - movhs r4, r3 ; value -= (split << 24) - subhs r2, r6, r2 ; range -= split - mvnhs r3, lr ; -v - addhs lr, r3, #1 ; v = (v ^ -1) + 1 - -; NORMALIZE - clz r3, r2 ; leading 0s in split - sub r3, r3, #24 ; shift - subs r5, r5, r3 ; count -= shift - mov r6, r2, lsl r3 ; range <<= shift - mov r4, r4, lsl r3 ; value <<= shift - ldrleb r2, [r8], #1 ; *(bufptr++) - addle r5, r5, #8 ; count += 8 - rsble r3, r5, #24 ; BR_COUNT - count - orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count) - - add r0, r0, #11 ; Prob += ENTROPY_NODES (11) - - cmn r1, #1 ; t < -ONE_TOKEN - - addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11) - - mvn r1, #1 ; t = -1 ???? C is -2 - -SKIP_EOB_CHECK - ldr r7, [sp, #l_c] ; c - ldr r3, [r9, #detok_scan] - add r1, r1, #2 ; t+= 2 - cmp r7, #15 ; c should will be one higher - - ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value - add r7, r7, #1 ; c++ - add r3, r11, r3, lsl #1 ; qcoeff + scan[c] - - str r7, [sp, #l_c] ; store c - strh lr, [r3] ; qcoef_ptr[scan[c]] = v - - blt COEFF_LOOP - - sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c - -END_OF_BLOCK - ldr r3, [sp, #l_type] ; type - ldr r10, [sp, #l_coef_ptr] ; coef_ptr - ldr r0, [sp, #l_qcoeff] ; qcoeff - ldr r11, [sp, #l_i] ; i - ldr r12, [sp, #l_stop] ; stop - - cmp r3, #0 ; type ?= 0 - moveq r1, #1 - movne r1, #0 - add r3, r11, r9 ; detok + i - - cmp r7, r1 ; c ?= !type - strb r7, [r3, #detok_eob] ; eob[i] = c - - ldr r7, [sp, #l_l_ptr] ; l - ldr r2, [sp, #l_a_ptr] ; a - movne r3, #1 ; t - moveq r3, #0 - - add r0, r0, #32 ; qcoeff += 32 (16 * 2?) - add r11, r11, #1 ; i++ - strb r3, [r7] ; *l = t - strb r3, [r2] ; *a = t - str r0, [sp, #l_qcoeff] ; qcoeff - str r11, [sp, #l_i] ; i - - cmp r11, r12 ; i < stop - ldr r7, [sp, #l_type] ; type - - blt BLOCK_LOOP - - cmp r11, #25 ; i ?= 25 - bne ln2_decode_mb_to - - ldr r12, [r9, #detok_qcoeff_start_ptr] - ldr r10, [r9, #detok_coef_probs] - mov r7, #0 ; type/i = 0 - mov r3, #16 ; stop = 16 - str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr - str r7, [sp, #l_i] - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - - str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0] - - b BLOCK_LOOP - -ln2_decode_mb_to - cmp r11, #16 ; i ?= 16 - bne ln1_decode_mb_to - - mov r10, #detok_coef_probs - add r10, r10, #2*4 ; coef_probs[type] - ldr r10, [r9, r10] ; detok + detok_coef_probs[type] - - mov r7, #2 ; type = 2 - mov r3, #24 ; stop = 24 - - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - - str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type] - b BLOCK_LOOP - -ln1_decode_mb_to - ldr r2, [sp, #l_bc] - mov r0, #0 - nop - - str r8, [r2, #bool_decoder_user_buffer] - str r5, [r2, #bool_decoder_count] - str r4, [r2, #bool_decoder_value] - str r6, [r2, #bool_decoder_range] - - add sp, sp, #l_stacksize - ldmia sp!, {r4 - r11, pc} - - ENDP ; |vp8_decode_mb_tokens_v6| - - END diff -Nru libvpx-0.9.5/vp8/decoder/arm/neon/dboolhuff_neon.asm libvpx-0.9.6/vp8/decoder/arm/neon/dboolhuff_neon.asm --- libvpx-0.9.5/vp8/decoder/arm/neon/dboolhuff_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/neon/dboolhuff_neon.asm 1970-01-01 00:00:00.000000000 +0000 @@ -1,160 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vp8_decode_value_neon| - EXPORT |vp8dx_start_decode_neon| - EXPORT |vp8dx_stop_decode_neon| - EXPORT |vp8dx_decode_bool_neon| - - ARM - REQUIRE8 - PRESERVE8 - - INCLUDE vpx_asm_offsets.asm - - AREA |.text|, CODE, READONLY ; name this block of code - -; int z = 0; -; int bit; -; for ( bit=bits-1; bit>=0; bit-- ) -; { -; z |= (vp8dx_decode_bool(br, 0x80)<> 8) - - mov lr, r0 - mov r0, #0 ;bit = 0 - ; - subs r5, r3, r4, lsl #24 - - subhs r2, r2, r4 ;range = br->range-split - movlo r2, r4 ;range = split - movhs r0, #1 ;bit = 1 - movhs r3, r5 ;value = value-bigsplit - - cmp r2, #0x80 - blt range_less_0x80 - strd r2, r3, [lr, #bool_decoder_range] ;store result - - ldmia sp!, {r4 - r5, pc} - -range_less_0x80 - - ldrd r4, r5, [lr, #bool_decoder_count] ;load count, pos, buffer - ldr r1, [lr, #bool_decoder_buffer] - - clz r12, r2 - add r1, r1, r5 - - sub r12, r12, #24 - subs r4, r4, r12 ;count -= shift - mov r2, r2, lsl r12 ;range <<= shift - mov r3, r3, lsl r12 ;value <<= shift - addle r4, r4, #8 ;count += 8 - ldrleb r12, [r1], #1 ;br->buffer[br->pos] - - rsble r1, r4, #8 ;-count - addle r5, r5, #1 ;br->pos++ - orrle r3, r3, r12, lsl r1 ;value |= (br->buffer[br->pos]) << (-count) - - strd r2, r3, [lr, #bool_decoder_range] ;store result - strd r4, r5, [lr, #bool_decoder_count] - - ldmia sp!, {r4 - r5, pc} - ENDP ; |vp8dx_decode_bool_neon| - - END diff -Nru libvpx-0.9.5/vp8/decoder/arm/neon/idct_blk_neon.c libvpx-0.9.6/vp8/decoder/arm/neon/idct_blk_neon.c --- libvpx-0.9.5/vp8/decoder/arm/neon/idct_blk_neon.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/arm/neon/idct_blk_neon.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ #include "vpx_ports/config.h" -#include "idct.h" -#include "dequantize.h" +#include "vp8/common/idct.h" +#include "vp8/decoder/dequantize.h" /* place these declarations here because we don't want to maintain them * outside of this scope diff -Nru libvpx-0.9.5/vp8/decoder/asm_dec_offsets.c libvpx-0.9.6/vp8/decoder/asm_dec_offsets.c --- libvpx-0.9.5/vp8/decoder/asm_dec_offsets.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/asm_dec_offsets.c 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include + +#include "onyxd_int.h" + +#define DEFINE(sym, val) int sym = val; + +/* +#define BLANK() asm volatile("\n->" : : ) +*/ + +/* + * int main(void) + * { + */ + +DEFINE(detok_scan, offsetof(DETOK, scan)); +DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove)); +DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr)); +DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr)); +DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr)); +DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x)); + +DEFINE(detok_A, offsetof(DETOK, A)); +DEFINE(detok_L, offsetof(DETOK, L)); + +DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr)); +DEFINE(detok_current_bc, offsetof(DETOK, current_bc)); +DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs)); +DEFINE(detok_eob, offsetof(DETOK, eob)); + +DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end)); +DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer)); +DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value)); +DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count)); +DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); + +DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); +DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length)); + +//add asserts for any offset that is not supported by assembly code +//add asserts for any size that is not supported by assembly code +/* + * return 0; + * } + */ diff -Nru libvpx-0.9.5/vp8/decoder/dboolhuff.c libvpx-0.9.6/vp8/decoder/dboolhuff.c --- libvpx-0.9.5/vp8/decoder/dboolhuff.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/dboolhuff.c 2011-03-04 20:40:40.000000000 +0000 @@ -26,8 +26,9 @@ }; -int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source, - unsigned int source_sz) +int vp8dx_start_decode(BOOL_DECODER *br, + const unsigned char *source, + unsigned int source_sz) { br->user_buffer_end = source+source_sz; br->user_buffer = source; @@ -39,13 +40,13 @@ return 1; /* Populate the buffer */ - vp8dx_bool_decoder_fill_c(br); + vp8dx_bool_decoder_fill(br); return 0; } -void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br) +void vp8dx_bool_decoder_fill(BOOL_DECODER *br) { const unsigned char *bufptr; const unsigned char *bufend; @@ -62,69 +63,3 @@ br->value = value; br->count = count; } - -#if 0 -/* - * Until optimized versions of these functions are available, we - * keep the implementation in the header to allow inlining. - * - * The RTCD-style invocations are still in place so this can - * be switched by just uncommenting these functions here and - * the DBOOLHUFF_INVOKE calls in the header. - */ -int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability) -{ - unsigned int bit=0; - VP8_BD_VALUE value; - unsigned int split; - VP8_BD_VALUE bigsplit; - int count; - unsigned int range; - - value = br->value; - count = br->count; - range = br->range; - - split = 1 + (((range-1) * probability) >> 8); - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); - - range = split; - if(value >= bigsplit) - { - range = br->range-split; - value = value-bigsplit; - bit = 1; - } - - /*if(range>=0x80) - { - br->value = value; - br->range = range; - return bit; - }*/ - - { - register unsigned int shift = vp8dx_bitreader_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - br->value = value; - br->count = count; - br->range = range; - if (count < 0) - vp8dx_bool_decoder_fill_c(br); - return bit; -} - -int vp8dx_decode_value_c(BOOL_DECODER *br, int bits) -{ - int z = 0; - int bit; - for ( bit=bits-1; bit>=0; bit-- ) - { - z |= (vp8dx_decode_bool(br, 0x80)<fn -#define IF_RTCD(x) (x) -#else*/ -#define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn -#define IF_RTCD(x) NULL -/*#endif*/ - DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]); -/* wrapper functions to hide RTCD. static means inline means hopefully no - * penalty - */ -static int vp8dx_start_decode(BOOL_DECODER *br, - struct vp8_dboolhuff_rtcd_vtable *rtcd, - const unsigned char *source, unsigned int source_sz) { -#if CONFIG_RUNTIME_CPU_DETECT - br->rtcd = rtcd; -#endif - return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz); -} -static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) { - DBOOLHUFF_INVOKE(br->rtcd, fill)(br); -} +int vp8dx_start_decode(BOOL_DECODER *br, + const unsigned char *source, + unsigned int source_sz); + +void vp8dx_bool_decoder_fill(BOOL_DECODER *br); /*The refill loop is used in several places, so define it in a macro to make sure they're all consistent. @@ -138,12 +67,6 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { - /* - * Until optimized versions of this function are available, we - * keep the implementation in the header to allow inlining. - * - *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability); - */ unsigned int bit = 0; VP8_BD_VALUE value; unsigned int split; @@ -167,13 +90,6 @@ bit = 1; } - /*if(range>=0x80) - { - br->value = value; - br->range = range; - return bit - }*/ - { register unsigned int shift = vp8dx_bitreader_norm[range]; range <<= shift; @@ -190,12 +106,6 @@ static int vp8_decode_value(BOOL_DECODER *br, int bits) { - /* - * Until optimized versions of this function are available, we - * keep the implementation in the header to allow inlining. - * - *return DBOOLHUFF_INVOKE(br->rtcd, devalue)(br, bits); - */ int z = 0; int bit; @@ -206,4 +116,29 @@ return z; } + +static int vp8dx_bool_error(BOOL_DECODER *br) +{ + /* Check if we have reached the end of the buffer. + * + * Variable 'count' stores the number of bits in the 'value' buffer, + * minus 8. So if count == 8, there are 16 bits available to be read. + * Normally, count is filled with 8 and one byte is filled into the + * value buffer. When we reach the end of the buffer, count is instead + * filled with VP8_LOTS_OF_BITS, 8 of which represent the last 8 real + * bits from the bitstream. So the last bit in the bitstream will be + * represented by count == VP8_LOTS_OF_BITS - 16. + */ + if ((br->count > VP8_BD_VALUE_SIZE) + && (br->count <= VP8_LOTS_OF_BITS - 16)) + { + /* We have tried to decode bits after the end of + * stream was encountered. + */ + return 1; + } + + /* No error. */ + return 0; +} #endif diff -Nru libvpx-0.9.5/vp8/decoder/decodemv.c libvpx-0.9.6/vp8/decoder/decodemv.c --- libvpx-0.9.5/vp8/decoder/decodemv.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/decodemv.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,10 +10,10 @@ #include "treereader.h" -#include "entropymv.h" -#include "entropymode.h" +#include "vp8/common/entropymv.h" +#include "vp8/common/entropymode.h" #include "onyxd_int.h" -#include "findnearmv.h" +#include "vp8/common/findnearmv.h" #if CONFIG_DEBUG #include diff -Nru libvpx-0.9.5/vp8/decoder/decoderthreading.h libvpx-0.9.6/vp8/decoder/decoderthreading.h --- libvpx-0.9.5/vp8/decoder/decoderthreading.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/decoderthreading.h 2011-03-04 20:40:40.000000000 +0000 @@ -19,7 +19,7 @@ extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); extern void vp8_decoder_remove_threads(VP8D_COMP *pbi); extern void vp8_decoder_create_threads(VP8D_COMP *pbi); -extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); +extern void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); #endif diff -Nru libvpx-0.9.5/vp8/decoder/decodframe.c libvpx-0.9.6/vp8/decoder/decodframe.c --- libvpx-0.9.5/vp8/decoder/decodframe.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/decodframe.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,28 +10,27 @@ #include "onyxd_int.h" -#include "header.h" -#include "reconintra.h" -#include "reconintra4x4.h" -#include "recon.h" -#include "reconinter.h" +#include "vp8/common/header.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" +#include "vp8/common/recon.h" +#include "vp8/common/reconinter.h" #include "dequantize.h" #include "detokenize.h" -#include "invtrans.h" -#include "alloccommon.h" -#include "entropymode.h" -#include "quant_common.h" +#include "vp8/common/invtrans.h" +#include "vp8/common/alloccommon.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/quant_common.h" #include "vpx_scale/vpxscale.h" #include "vpx_scale/yv12extend.h" -#include "setupintrarecon.h" +#include "vp8/common/setupintrarecon.h" #include "decodemv.h" -#include "extend.h" +#include "vp8/common/extend.h" #include "vpx_mem/vpx_mem.h" -#include "idct.h" +#include "vp8/common/idct.h" #include "dequantize.h" -#include "predictdc.h" -#include "threading.h" +#include "vp8/common/threading.h" #include "decoderthreading.h" #include "dboolhuff.h" @@ -381,6 +380,12 @@ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) + { + /* propagate errors from reference frames */ + xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; + } + vp8_build_uvmvs(xd, pc->full_pixel); /* @@ -391,6 +396,8 @@ */ vp8_decode_macroblock(pbi, xd); + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= vp8dx_bool_error(xd->current_bc); recon_yoffset += 16; recon_uvoffset += 8; @@ -461,13 +468,13 @@ partition_size = user_data_end - partition; } - if (user_data_end - partition < partition_size) + if (partition + partition_size > user_data_end + || partition + partition_size < partition) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition " "%d length", i + 1); - if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff), - partition, partition_size)) + if (vp8dx_start_decode(bool_decoder, partition, partition_size)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", i + 1); @@ -476,15 +483,16 @@ bool_decoder++; } +#if CONFIG_MULTITHREAD /* Clamp number of decoder threads */ if (pbi->decoding_thread_count > num_part - 1) pbi->decoding_thread_count = num_part - 1; +#endif } static void stop_token_decoder(VP8D_COMP *pbi) { - int i; VP8_COMMON *pc = &pbi->common; if (pc->multi_token_partition != ONE_PARTITION) @@ -555,6 +563,7 @@ xd->frame_type = pc->frame_type; xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_stride = pc->mode_info_stride; + xd->corrupted = 0; /* init without corruption */ } int vp8_decode_frame(VP8D_COMP *pbi) @@ -570,6 +579,10 @@ int i, j, k, l; const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; + /* start with no corruption of current frame */ + xd->corrupted = 0; + pc->yv12_fb[pc->new_fb_idx].corrupted = 0; + if (data_end - data < 3) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); @@ -580,7 +593,8 @@ (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5; data += 3; - if (data_end - data < first_partition_length_in_bytes) + if (data + first_partition_length_in_bytes > data_end + || data + first_partition_length_in_bytes < data) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition 0 length"); vp8_setup_version(pc); @@ -637,8 +651,7 @@ init_frame(pbi); - if (vp8dx_start_decode(bc, IF_RTCD(&pbi->dboolhuff), - data, data_end - data)) + if (vp8dx_start_decode(bc, data, data_end - data)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); if (pc->frame_type == KEY_FRAME) { @@ -832,7 +845,9 @@ vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); /* set up frame new frame for intra coded blocks */ +#if CONFIG_MULTITHREAD if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) +#endif vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); vp8_setup_block_dptrs(xd); @@ -852,6 +867,7 @@ vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO)); +#if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) { vp8mt_decode_mb_rows(pbi, xd); @@ -866,6 +882,7 @@ vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ } else +#endif { int ibc = 0; int num_part = 1 << pc->multi_token_partition; @@ -890,6 +907,14 @@ stop_token_decoder(pbi); + /* Collect information about decoder corruption. */ + /* 1. Check first boolean decoder for errors. */ + pc->yv12_fb[pc->new_fb_idx].corrupted = + vp8dx_bool_error(bc); + /* 2. Check the macroblock information */ + pc->yv12_fb[pc->new_fb_idx].corrupted |= + xd->corrupted; + /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ /* If this was a kf or Gf note the Q used */ diff -Nru libvpx-0.9.5/vp8/decoder/dequantize.c libvpx-0.9.6/vp8/decoder/dequantize.c --- libvpx-0.9.5/vp8/decoder/dequantize.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/dequantize.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,8 +11,7 @@ #include "vpx_ports/config.h" #include "dequantize.h" -#include "predictdc.h" -#include "idct.h" +#include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ; diff -Nru libvpx-0.9.5/vp8/decoder/dequantize.h libvpx-0.9.6/vp8/decoder/dequantize.h --- libvpx-0.9.5/vp8/decoder/dequantize.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/dequantize.h 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ #ifndef DEQUANTIZE_H #define DEQUANTIZE_H -#include "blockd.h" +#include "vp8/common/blockd.h" #define prototype_dequant_block(sym) \ void sym(BLOCKD *x) diff -Nru libvpx-0.9.5/vp8/decoder/detokenize.c libvpx-0.9.6/vp8/decoder/detokenize.c --- libvpx-0.9.5/vp8/decoder/detokenize.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/detokenize.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ -#include "type_aliases.h" -#include "blockd.h" +#include "vp8/common/type_aliases.h" +#include "vp8/common/blockd.h" #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -74,37 +74,6 @@ } } -#if CONFIG_ARM_ASM_DETOK -/* mashup of vp8_block2left and vp8_block2above so we only need one pointer - * for the assembly version. - */ -DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) = -{ - /* vp8_block2left */ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - /* vp8_block2above */ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 -}; - -void vp8_init_detokenizer(VP8D_COMP *dx) -{ - const VP8_COMMON *const oc = & dx->common; - MACROBLOCKD *x = & dx->mb; - - dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree; - dx->detoken.ptr_block2leftabove = vp8_block2leftabove; - dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x; - dx->detoken.scan = vp8_default_zig_zag1d; - dx->detoken.teb_base_ptr = vp8d_token_extra_bits2; - dx->detoken.qcoeff_start_ptr = &x->qcoeff[0]; - - dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]); - dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]); - dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]); - dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]); -} -#endif - DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]); #define FILL \ if(count < 0) \ @@ -202,35 +171,6 @@ }\ NORMALIZE -#if CONFIG_ARM_ASM_DETOK -int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) -{ - int eobtotal = 0; - int i, type; - - dx->detoken.current_bc = x->current_bc; - dx->detoken.A = x->above_context; - dx->detoken.L = x->left_context; - - type = 3; - - if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) - { - type = 1; - eobtotal -= 16; - } - - vp8_decode_mb_tokens_v6(&dx->detoken, type); - - for (i = 0; i < 25; i++) - { - x->eobs[i] = dx->detoken.eob[i]; - eobtotal += dx->detoken.eob[i]; - } - - return eobtotal; -} -#else int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; @@ -423,4 +363,3 @@ return eobtotal; } -#endif /*!CONFIG_ASM_DETOK*/ diff -Nru libvpx-0.9.5/vp8/decoder/detokenize.h libvpx-0.9.6/vp8/decoder/detokenize.h --- libvpx-0.9.5/vp8/decoder/detokenize.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/detokenize.h 2011-03-04 20:40:40.000000000 +0000 @@ -14,10 +14,6 @@ #include "onyxd_int.h" -#if ARCH_ARM -#include "arm/detokenize_arm.h" -#endif - void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); diff -Nru libvpx-0.9.5/vp8/decoder/generic/dsystemdependent.c libvpx-0.9.6/vp8/decoder/generic/dsystemdependent.c --- libvpx-0.9.5/vp8/decoder/generic/dsystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/generic/dsystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,8 +10,8 @@ #include "vpx_ports/config.h" -#include "dequantize.h" -#include "onyxd_int.h" +#include "vp8/decoder/dequantize.h" +#include "vp8/decoder/onyxd_int.h" extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi); extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi); @@ -27,12 +27,6 @@ pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c; pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c; pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; -#if 0 /*For use with RTCD, when implemented*/ - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; -#endif #endif #if ARCH_X86 || ARCH_X86_64 diff -Nru libvpx-0.9.5/vp8/decoder/idct_blk.c libvpx-0.9.6/vp8/decoder/idct_blk.c --- libvpx-0.9.5/vp8/decoder/idct_blk.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/idct_blk.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ #include "vpx_ports/config.h" -#include "idct.h" +#include "vp8/common/idct.h" #include "dequantize.h" void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, diff -Nru libvpx-0.9.5/vp8/decoder/onyxd_if.c libvpx-0.9.6/vp8/decoder/onyxd_if.c --- libvpx-0.9.5/vp8/decoder/onyxd_if.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/onyxd_if.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,25 +9,25 @@ */ -#include "onyxc_int.h" +#include "vp8/common/onyxc_int.h" #if CONFIG_POSTPROC -#include "postproc.h" +#include "vp8/common/postproc.h" #endif -#include "onyxd.h" +#include "vp8/common/onyxd.h" #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" -#include "alloccommon.h" +#include "vp8/common/alloccommon.h" #include "vpx_scale/yv12extend.h" -#include "loopfilter.h" -#include "swapyv12buffer.h" -#include "g_common.h" -#include "threading.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/swapyv12buffer.h" +#include "vp8/common/g_common.h" +#include "vp8/common/threading.h" #include "decoderthreading.h" #include -#include "quant_common.h" +#include "vp8/common/quant_common.h" #include "vpx_scale/vpxscale.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" #if ARCH_ARM @@ -114,8 +114,10 @@ pbi->ready_for_new_data = 1; pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/ +#if CONFIG_MULTITHREAD pbi->max_threads = oxcf->max_threads; vp8_decoder_create_threads(pbi); +#endif /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. @@ -131,9 +133,6 @@ cm->last_sharpness_level = cm->sharpness_level; } -#if CONFIG_ARM_ASM_DETOK - vp8_init_detokenizer(pbi); -#endif pbi->common.error.setjmp = 0; return (VP8D_PTR) pbi; } @@ -149,8 +148,8 @@ #if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd) vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); -#endif vp8_decoder_remove_threads(pbi); +#endif vp8_remove_common(&pbi->common); vpx_free(pbi); } @@ -254,12 +253,7 @@ /* If any buffer copy / swapping is signalled it should be done here. */ static int swap_frame_buffers (VP8_COMMON *cm) { - int fb_to_update_with, err = 0; - - if (cm->refresh_last_frame) - fb_to_update_with = cm->lst_fb_idx; - else - fb_to_update_with = cm->new_fb_idx; + int err = 0; /* The alternate reference frame or golden frame can be updated * using the new, last, or golden/alt ref frame. If it @@ -271,7 +265,7 @@ int new_fb = 0; if (cm->copy_buffer_to_arf == 1) - new_fb = fb_to_update_with; + new_fb = cm->lst_fb_idx; else if (cm->copy_buffer_to_arf == 2) new_fb = cm->gld_fb_idx; else @@ -285,7 +279,7 @@ int new_fb = 0; if (cm->copy_buffer_to_gf == 1) - new_fb = fb_to_update_with; + new_fb = cm->lst_fb_idx; else if (cm->copy_buffer_to_gf == 2) new_fb = cm->alt_fb_idx; else @@ -334,6 +328,23 @@ pbi->common.error.error_code = VPX_CODEC_OK; + if (size == 0) + { + /* This is used to signal that we are missing frames. + * We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + + /* Signal that we have no frame to show. */ + cm->show_frame = 0; + + /* Nothing more to do. */ + return 0; + } + + #if HAVE_ARMV7 #if CONFIG_RUNTIME_CPU_DETECT if (cm->rtcd.flags & HAS_NEON) @@ -356,6 +367,13 @@ } #endif pbi->common.error.setjmp = 0; + + /* We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; return -1; @@ -388,6 +406,7 @@ return retcode; } +#if CONFIG_MULTITHREAD if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) { if (swap_frame_buffers (cm)) @@ -405,6 +424,7 @@ return -1; } } else +#endif { if (swap_frame_buffers (cm)) { @@ -506,7 +526,7 @@ pbi->common.error.setjmp = 0; return retcode; } -int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags) +int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags) { int ret = -1; VP8D_COMP *pbi = (VP8D_COMP *) ptr; @@ -524,7 +544,7 @@ sd->clrtype = pbi->common.clr_type; #if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags); + ret = vp8_post_proc_frame(&pbi->common, sd, flags); #else if (pbi->common.frame_to_show) diff -Nru libvpx-0.9.5/vp8/decoder/onyxd_int.h libvpx-0.9.6/vp8/decoder/onyxd_int.h --- libvpx-0.9.5/vp8/decoder/onyxd_int.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/onyxd_int.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,10 +12,10 @@ #ifndef __INC_VP8D_INT_H #define __INC_VP8D_INT_H #include "vpx_ports/config.h" -#include "onyxd.h" +#include "vp8/common/onyxd.h" #include "treereader.h" -#include "onyxc_int.h" -#include "threading.h" +#include "vp8/common/onyxc_int.h" +#include "vp8/common/threading.h" #include "dequantize.h" typedef struct @@ -87,14 +87,15 @@ unsigned int time_decoding; unsigned int time_loop_filtering; +#if CONFIG_MULTITHREAD + /* variable for threading */ + volatile int b_multithreaded_rd; int max_threads; int current_mb_col_main; int decoding_thread_count; int allocated_decoding_thread_count; - /* variable for threading */ -#if CONFIG_MULTITHREAD int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; int *mt_current_mb_col; /* Each row remembers its already decoded column. */ @@ -125,7 +126,6 @@ #if CONFIG_RUNTIME_CPU_DETECT vp8_dequant_rtcd_vtable_t dequant; - struct vp8_dboolhuff_rtcd_vtable dboolhuff; #endif diff -Nru libvpx-0.9.5/vp8/decoder/reconintra_mt.c libvpx-0.9.6/vp8/decoder/reconintra_mt.c --- libvpx-0.9.5/vp8/decoder/reconintra_mt.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/reconintra_mt.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,8 +10,8 @@ #include "vpx_ports/config.h" -#include "recon.h" -#include "reconintra.h" +#include "vp8/common/recon.h" +#include "vp8/common/reconintra.h" #include "vpx_mem/vpx_mem.h" #include "onyxd_int.h" @@ -21,7 +21,6 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ unsigned char *yleft_col; unsigned char yleft_buf[16]; @@ -146,17 +145,10 @@ case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ unsigned char *yleft_col; unsigned char yleft_buf[16]; @@ -289,17 +281,10 @@ case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ unsigned char *uleft_col; /*[16];*/ unsigned char uleft_buf[8]; @@ -452,17 +437,10 @@ case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ unsigned char *uleft_col; /*[16];*/ unsigned char uleft_buf[8]; @@ -621,12 +599,6 @@ case MB_MODE_COUNT: break; } -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } @@ -638,7 +610,6 @@ int mb_col, int num) { -#if CONFIG_MULTITHREAD int i, r, c; unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */ @@ -935,15 +906,6 @@ } -#else - (void) pbi; - (void) xd; - (void) b_mode; - (void) predictor; - (void) mb_row; - (void) mb_col; - (void) num; -#endif } /* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and @@ -951,7 +913,6 @@ */ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */ unsigned int *src_ptr; unsigned int *dst_ptr0; @@ -973,10 +934,4 @@ *dst_ptr0 = *src_ptr; *dst_ptr1 = *src_ptr; *dst_ptr2 = *src_ptr; -#else - (void) pbi; - (void) x; - (void) mb_row; - (void) mb_col; -#endif } diff -Nru libvpx-0.9.5/vp8/decoder/threading.c libvpx-0.9.6/vp8/decoder/threading.c --- libvpx-0.9.5/vp8/decoder/threading.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/threading.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -#ifndef WIN32 +#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 # include #endif #ifdef __APPLE__ @@ -17,13 +17,13 @@ #endif #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" -#include "threading.h" +#include "vp8/common/threading.h" -#include "loopfilter.h" -#include "extend.h" +#include "vp8/common/loopfilter.h" +#include "vp8/common/extend.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" -#include "reconinter.h" +#include "vp8/common/reconinter.h" #include "reconintra_mt.h" extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); @@ -38,7 +38,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i, j; @@ -88,18 +87,11 @@ for (i=0; i< pc->mb_rows; i++) pbi->mt_current_mb_col[i]=-1; -#else - (void) pbi; - (void) xd; - (void) mbrd; - (void) count; -#endif } void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col) { -#if CONFIG_MULTITHREAD int eobtotal = 0; int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs; VP8_COMMON *pc = &pbi->common; @@ -222,18 +214,11 @@ (xd->qcoeff+16*16, xd->block[16].dequant, xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs+16); -#else - (void) pbi; - (void) xd; - (void) mb_row; - (void) mb_col; -#endif } THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) { -#if CONFIG_MULTITHREAD int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); @@ -320,7 +305,7 @@ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units * Apply any context driven MB level adjustment */ - vp8_adjust_mb_lf_value(xd, &filter_level); + filter_level = vp8_adjust_mb_lf_value(xd, filter_level); } /* Distance of Mb to the various image edges. @@ -438,9 +423,6 @@ sem_post(&pbi->h_event_end_decoding); } } -#else - (void) p_data; -#endif return 0 ; } @@ -448,10 +430,8 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) { -#if CONFIG_MULTITHREAD int core_count = 0; int ithread; - int i; pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; @@ -483,37 +463,26 @@ pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; } - -#else - (void) pbi; -#endif } void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i; if (pbi->b_multithreaded_rd) { - if (pbi->mt_current_mb_col) - { vpx_free(pbi->mt_current_mb_col); pbi->mt_current_mb_col = NULL ; - } /* Free above_row buffers. */ if (pbi->mt_yabove_row) { for (i=0; i< mb_rows; i++) { - if (pbi->mt_yabove_row[i]) - { vpx_free(pbi->mt_yabove_row[i]); pbi->mt_yabove_row[i] = NULL ; - } } vpx_free(pbi->mt_yabove_row); pbi->mt_yabove_row = NULL ; @@ -523,11 +492,8 @@ { for (i=0; i< mb_rows; i++) { - if (pbi->mt_uabove_row[i]) - { vpx_free(pbi->mt_uabove_row[i]); pbi->mt_uabove_row[i] = NULL ; - } } vpx_free(pbi->mt_uabove_row); pbi->mt_uabove_row = NULL ; @@ -537,11 +503,8 @@ { for (i=0; i< mb_rows; i++) { - if (pbi->mt_vabove_row[i]) - { vpx_free(pbi->mt_vabove_row[i]); pbi->mt_vabove_row[i] = NULL ; - } } vpx_free(pbi->mt_vabove_row); pbi->mt_vabove_row = NULL ; @@ -552,11 +515,8 @@ { for (i=0; i< mb_rows; i++) { - if (pbi->mt_yleft_col[i]) - { vpx_free(pbi->mt_yleft_col[i]); pbi->mt_yleft_col[i] = NULL ; - } } vpx_free(pbi->mt_yleft_col); pbi->mt_yleft_col = NULL ; @@ -566,11 +526,8 @@ { for (i=0; i< mb_rows; i++) { - if (pbi->mt_uleft_col[i]) - { vpx_free(pbi->mt_uleft_col[i]); pbi->mt_uleft_col[i] = NULL ; - } } vpx_free(pbi->mt_uleft_col); pbi->mt_uleft_col = NULL ; @@ -580,25 +537,18 @@ { for (i=0; i< mb_rows; i++) { - if (pbi->mt_vleft_col[i]) - { vpx_free(pbi->mt_vleft_col[i]); pbi->mt_vleft_col[i] = NULL ; - } } vpx_free(pbi->mt_vleft_col); pbi->mt_vleft_col = NULL ; } } -#else - (void) pbi; -#endif } -int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) +void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { -#if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i; int uv_width; @@ -647,18 +597,11 @@ for (i=0; i< pc->mb_rows; i++) CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); } - return 0; -#else - (void) pbi; - (void) width; -#endif } void vp8_decoder_remove_threads(VP8D_COMP *pbi) { -#if CONFIG_MULTITHREAD - /* shutdown MB Decoding thread; */ if (pbi->b_multithreaded_rd) { @@ -680,39 +623,23 @@ sem_destroy(&pbi->h_event_end_decoding); - if (pbi->h_decoding_thread) - { vpx_free(pbi->h_decoding_thread); pbi->h_decoding_thread = NULL; - } - if (pbi->h_event_start_decoding) - { vpx_free(pbi->h_event_start_decoding); pbi->h_event_start_decoding = NULL; - } - if (pbi->mb_row_di) - { vpx_free(pbi->mb_row_di); pbi->mb_row_di = NULL ; - } - if (pbi->de_thread_data) - { vpx_free(pbi->de_thread_data); pbi->de_thread_data = NULL; - } } -#else - (void) pbi; -#endif } void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl) { -#if CONFIG_MULTITHREAD VP8_COMMON *cm = &pbi->common; MACROBLOCKD *mbd = &pbi->mb; /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/ @@ -722,7 +649,6 @@ /*int mb_row; int mb_col; int baseline_filter_level[MAX_MB_SEGMENTS];*/ - int filter_level; int alt_flt_enabled = mbd->segmentation_enabled; int i; @@ -755,22 +681,17 @@ vp8_init_loop_filter(cm); else if (frame_type != cm->last_frame_type) vp8_frame_init_loop_filter(lfi, frame_type); -#else - (void) pbi; - (void) default_filt_lvl; -#endif } void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) { -#if CONFIG_MULTITHREAD int mb_row; VP8_COMMON *pc = &pbi->common; int ibc = 0; int num_part = 1 << pbi->common.multi_token_partition; - int i, j; + int i; volatile int *last_row_current_mb_col = NULL; int nsync = pbi->sync_range; @@ -810,7 +731,6 @@ for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { - int i; xd->current_bc = &pbi->mbc[mb_row%num_part]; @@ -867,7 +787,7 @@ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units * Apply any context driven MB level adjustment */ - vp8_adjust_mb_lf_value(xd, &filter_level); + filter_level = vp8_adjust_mb_lf_value(xd, filter_level); } /* Distance of Mb to the various image edges. @@ -894,9 +814,18 @@ xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) + { + /* propagate errors from reference frames */ + xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; + } + vp8_build_uvmvs(xd, pc->full_pixel); vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col); + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= vp8dx_bool_error(xd->current_bc); + if (pbi->common.filter_level) { /* Save decoded MB last row data for next-row decoding */ @@ -976,8 +905,4 @@ } sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ -#else - (void) pbi; - (void) xd; -#endif } diff -Nru libvpx-0.9.5/vp8/decoder/treereader.h libvpx-0.9.6/vp8/decoder/treereader.h --- libvpx-0.9.5/vp8/decoder/treereader.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/treereader.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,7 +12,7 @@ #ifndef tree_reader_h #define tree_reader_h 1 -#include "treecoder.h" +#include "vp8/common/treecoder.h" #include "dboolhuff.h" diff -Nru libvpx-0.9.5/vp8/decoder/x86/idct_blk_mmx.c libvpx-0.9.6/vp8/decoder/x86/idct_blk_mmx.c --- libvpx-0.9.5/vp8/decoder/x86/idct_blk_mmx.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/x86/idct_blk_mmx.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ #include "vpx_ports/config.h" -#include "idct.h" -#include "dequantize.h" +#include "vp8/common/idct.h" +#include "vp8/decoder/dequantize.h" void vp8_dequant_dc_idct_add_y_block_mmx (short *q, short *dq, unsigned char *pre, diff -Nru libvpx-0.9.5/vp8/decoder/x86/idct_blk_sse2.c libvpx-0.9.6/vp8/decoder/x86/idct_blk_sse2.c --- libvpx-0.9.5/vp8/decoder/x86/idct_blk_sse2.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/x86/idct_blk_sse2.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ #include "vpx_ports/config.h" -#include "idct.h" -#include "dequantize.h" +#include "vp8/common/idct.h" +#include "vp8/decoder/dequantize.h" void idct_dequant_dc_0_2x_sse2 (short *q, short *dq, unsigned char *pre, diff -Nru libvpx-0.9.5/vp8/decoder/x86/onyxdxv.c libvpx-0.9.6/vp8/decoder/x86/onyxdxv.c --- libvpx-0.9.5/vp8/decoder/x86/onyxdxv.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/x86/onyxdxv.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1080 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : onyxdxv.c -* -* Description : VP80 interface to DXV. -* -***************************************************************************** -*/ -/**************************************************************************** -* Header Files -****************************************************************************/ -#include // For Abs() -#include "pragmas.h" - -#include "vpxdxv.h" -#include "vpxdxv_plugin.h" - -#include "onyxd_int.h" -#include "onyx.h" -#include "codec_common_interface.h" -#include "vpx_scale/vpxscale.h" -#include "vpx_mem/vpx_mem.h" -#include "postproc.h" -#include "vpxblit.h" -#include "g_common.h" -#include "vpx_scale/yv12extend.h" - -#include -#include -#include "scale_mode.h" -#include "onyx_pb_interface.h" - -/**************************************************************************** -* Macros -****************************************************************************/ - -#define VP8_FOURCC DXL_MKFOURCC( 'V', 'P', '8', '0') - -extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch); - - -/**************************************************************************** -* Typedefs -****************************************************************************/ - -typedef struct // YUV buffer configuration structure -{ - int y_width; - int y_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_stride; - - char *y_buffer; - char *u_buffer; - char *v_buffer; - - char *uv_start; - int uv_dst_area; - int uv_used_area; - - unsigned char *y_ptr_scrn; - unsigned char *u_ptr_scrn; - unsigned char *v_ptr_scrn; - - -} DXV_YUV_BUFFER_CONFIG; - - -typedef void ((*vp8blit_func)(unsigned char *, int, YUV_BUFFER_CONFIG *)); - -/* define an x_image structure based on the core x_image struct */ -typedef struct t_ximage_codec -{ - DXV_YUV_BUFFER_CONFIG frame_buffer; - VP8D_COMP *my_pbi; - VP8_COMMON *common; - int owned; - int decompressed_once; - - int sizeof_pixel; - vp8blit_func blitter; - - unsigned int ppl_tag; - unsigned int bd_tag; - unsigned int *supported_output_format_list; - - int cpu_free; - int postproc; - int add_noise; - int deinterlace; - - int post_proc2time; - int post_proc4time; - - int hs; - int hr; - int vs; - int vr; - YV12_BUFFER_CONFIG this_buffer; - YV12_BUFFER_CONFIG scaled_buffer; - YV12_BUFFER_CONFIG *passed_in_buffer; - - int avgq; - int ppcount; - - -} VP8_XIMAGE, *VP8_XIMAGE_HANDLE; - - -/**************************************************************************** -* Modul Statics -****************************************************************************/ -static unsigned int g_vp8_preferred_output_format_list[] = -{ - VPXDXV_YUY2, - VPXDXV_UYVY, - VPXDXV_RGB8888, - VPXDXV_RGB888, - VPXDXV_RGB555, - VPXDXV_RGB565, - VPXDXV_YV12, - VPXDXV_I420, - -// VPXDXV_YV12, -// VPXDXV_YUY2, -// VPXDXV_RGB565, -// VPXDXV_UYVY, - 0 -}; - -/**************************************************************************** -* Forward declarationss -****************************************************************************/ -void onyx_set_parameter(XIMAGE_HANDLE src, int Command, unsigned int Parameter); - -static int onyx_get_output_format(XIMAGE_HANDLE src, unsigned int *bd_tag); -static int onyx_set_output_format(XIMAGE_HANDLE src, unsigned int bd_tag); - -static int vpx_get_size_of_pixel(unsigned int bd); - -/**************************************************************************** -* Imports -****************************************************************************/ - -#define __Clamp255(x) (unsigned char) ( (x) < 0 ? 0 : ( (x) <= 255 ? (x) : 255 ) ) - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -void -convert_yv12_buffer_types(YV12_BUFFER_CONFIG *source, DXV_YUV_BUFFER_CONFIG *dest) -{ - dest->y_buffer = (char *)source->y_buffer; - dest->u_buffer = (char *)source->u_buffer; - dest->v_buffer = (char *)source->v_buffer; - dest->y_width = source->y_width; - dest->y_height = source->y_height; - dest->y_stride = source->y_stride; - dest->uv_width = source->uv_width; - dest->uv_height = source->uv_height; - dest->uv_stride = source->uv_stride; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - - -int onyx_blit -( - XIMAGE_HANDLE src, - VSCREEN_HANDLE v_screen, - DXV_YUV_BUFFER_CONFIG *frame_buffer, - int x, - int y -) -{ - VP8_XIMAGE_HANDLE tab = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - VP8D_COMP *pbi; - VP8_COMMON *common = tab->common; - pbi = tab->my_pbi; - - if (v_screen) /* if there is a v_screen, blit to it */ - { - unsigned char *ptr_scrn; - int this_pitch, vs_height, vs_width; - unsigned int start_tick, stop_tick; - - vpxdxv_get_vscreen_attributes(v_screen, (void **)&ptr_scrn, &vs_width, &vs_height, &this_pitch); - - if (ptr_scrn) - { - int w, h; - - int p_size; - int view_x, view_y, view_w; - int hs, hr, vs, vr; - int neww, newh; - int cw, ch; - int microseconds_available = (int)(1000000 / 30); - - microseconds_available = microseconds_available * tab->cpu_free / 100; - - if (pbi) - { - microseconds_available -= pbi->decode_microseconds; - - if (tab->cpu_free == 0) - microseconds_available = INT_MAX; - - if (tab->post_proc2time == 0) - tab->post_proc2time = pbi->decode_microseconds * 1 / 2; - - if (tab->post_proc4time == 0) - tab->post_proc4time = pbi->decode_microseconds; - } - - - if (tab->ppcount == 0) - { - tab->post_proc2time = 0; - tab->post_proc4time = 0; - tab->ppcount = 64; - } - else - { - tab->ppcount --; - } - - vpxdxv_get_vscreen_view(v_screen, &view_x, &view_y, &view_w, NULL); - - Scale2Ratio(common->horiz_scale, &hr, &hs); - Scale2Ratio(common->vert_scale, &vr, &vs); - - if (tab->postproc && tab->passed_in_buffer == 0) - { - int show_text = 0; - - unsigned char message[512]; - - int pp = tab->postproc; - int q = (tab->avgq + 4) / 8; - int noise = 0; - - vp8_clear_system_state(); - - if (pp >= 1000) - { - pp -= 1000; - noise = pp / 100; - pp = pp - noise * 100; - } - - if (pp >= 300) - { - pp -= 300; - show_text = 3; - } - else if (pp >= 200) - { - pp -= 200; - show_text = 2; - } - else if (pp >= 100) - { - pp -= 100; - show_text = 1; - } - - if (pbi && (pbi->mb.segmentation_enabled & SEGMENT_PF) && tab->deinterlace) - { - de_interlace(common->frame_to_show->y_buffer, common->post_proc_buffer.y_buffer, - common->post_proc_buffer.y_width, common->post_proc_buffer.y_height, - common->post_proc_buffer.y_stride); - - de_interlace(common->frame_to_show->u_buffer, common->post_proc_buffer.u_buffer, - common->post_proc_buffer.uv_width, common->post_proc_buffer.uv_height, - common->post_proc_buffer.uv_stride); - de_interlace(common->frame_to_show->v_buffer, common->post_proc_buffer.v_buffer, - common->post_proc_buffer.uv_width, common->post_proc_buffer.uv_height, - common->post_proc_buffer.uv_stride); - } - else - { - if (pp >= 10 && pp <= 20) - { - q = q + (pp - 15) * 10; - - if (q < 0) - q = 0; - } - - start_tick = vp8_get_high_res_timer_tick(); - - if (pp > 3 && tab->post_proc4time < microseconds_available) - { - vp8_deblock_and_de_macro_block(common->frame_to_show, &common->post_proc_buffer, q, 1, 0); - - stop_tick = vp8_get_high_res_timer_tick(); - - if (pbi) - tab->post_proc4time = vp8_get_time_in_micro_sec(start_tick, stop_tick); - } - - else if (pp > 0 && tab->post_proc2time < microseconds_available) - { - vp8_deblock(common->frame_to_show, &common->post_proc_buffer, q , 1, 0); - stop_tick = vp8_get_high_res_timer_tick(); - - if (pbi) - tab->post_proc2time = vp8_get_time_in_micro_sec(start_tick, stop_tick); - } - else - { - vp8_yv12_copy_frame(common->frame_to_show, &common->post_proc_buffer); - } - - } - - vp8_clear_system_state(); - - if (tab->add_noise == 1) - { - - vp8_plane_add_noise(common->post_proc_buffer.y_buffer, - common->post_proc_buffer.y_width, common->post_proc_buffer.y_height, - common->post_proc_buffer.y_stride, 63 - q, noise); - } - - - if (show_text == 1) - { -#ifdef PACKET_TESTING - { - VP8_HEADER *oh2 = (VP8_HEADER *) pbi->Source; - sprintf(message, "%8d %d%d%d%d%d size:%d\n", - oh2->frame_number , - oh2->update_gold , - oh2->update_last , - oh2->uses_gold , - oh2->uses_last , - oh2->type, - vpxdxv_get_ximage_csize(src)); - } -#else - sprintf(message, "F:%1ldG:%1ldQ:%3ldF:%3ld,%3ldP:%d_s:%6ld,N:%d,", - (common->frame_type == KEY_FRAME), - common->refresh_golden_frame, - common->base_qindex, - common->filter_level, - q, - tab->postproc, - vpxdxv_get_ximage_csize(src), noise); -#endif - - vp8_blit_text(message, common->post_proc_buffer.y_buffer, common->post_proc_buffer.y_stride); - - } - else if (show_text == 2) - { - int i, j; - unsigned char *y_ptr; - YV12_BUFFER_CONFIG *post = &common->post_proc_buffer; - int mb_rows = post->y_height >> 4; - int mb_cols = post->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = common->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - // vp8_filter each macro block - for (i = 0; i < mb_rows; i++) - { - for (j = 0; j < mb_cols; j++) - { - char zz[4]; - - if (pp == 4) - sprintf(zz, "%c", mi[mb_index].mbmi.mode + 'a'); - else - sprintf(zz, "%c", mi[mb_index].mbmi.ref_frame + 'a'); - - vp8_blit_text(zz, y_ptr, post->y_stride); - mb_index ++; - y_ptr += 16; - } - - mb_index ++; //border - y_ptr += post->y_stride * 16 - post->y_width; - - } - } - else if (show_text == 3) - { - int i, j; - unsigned char *y_ptr; - YV12_BUFFER_CONFIG *post = &common->post_proc_buffer; - int mb_rows = post->y_height >> 4; - int mb_cols = post->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = common->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - // vp8_filter each macro block - for (i = 0; i < mb_rows; i++) - { - for (j = 0; j < mb_cols; j++) - { - char zz[4]; - - if (j == 0) - sprintf(zz, "%c", '0' + i % 10); - else - sprintf(zz, "%c", '0' + j % 10); - - vp8_blit_text(zz, y_ptr, post->y_stride); - mb_index ++; - y_ptr += 16; - } - - y_ptr += post->y_stride * 16 - post->y_width; - - } - } - - vpx_memcpy(&tab->this_buffer, &common->post_proc_buffer, sizeof(YV12_BUFFER_CONFIG)); - } - else - { - vpx_memcpy(&tab->this_buffer, common->frame_to_show, sizeof(YV12_BUFFER_CONFIG)); - } - - - /* get a frame pointer to the scaled and postprocessed reconstructed buffer */ - if (tab->passed_in_buffer == 0) - { - if (common->horiz_scale != NORMAL || common->vert_scale != NORMAL) - { - neww = hs * tab->this_buffer.y_width / hr; - newh = vs * tab->this_buffer.y_height / vr; - - neww += neww & 1; - - if (tab->hs != hs || tab->hr != hr || tab->vs != vs || tab->vr != vr) - { - vp8_yv12_alloc_frame_buffer(&tab->scaled_buffer, neww, newh , 8); - } - - vp8_yv12_scale_or_center(&tab->this_buffer, - &tab->scaled_buffer, - neww, newh, SCALE_TO_FIT, hs, hr, vs, vr); - - convert_yv12_buffer_types(&tab->scaled_buffer, frame_buffer); - - cw = hs * common->Width / hr; - ch = vs * common->Height / vr; - - } - else - { - convert_yv12_buffer_types(&tab->this_buffer, frame_buffer); - - cw = common->Width; - ch = common->Height; - } - } - else - { - convert_yv12_buffer_types(tab->passed_in_buffer, frame_buffer); - cw = common->Width; - ch = common->Height; - tab->passed_in_buffer = 0; - } - - frame_buffer->y_width = cw; - frame_buffer->y_height = ch; - frame_buffer->uv_width = cw / 2; - frame_buffer->uv_height = ch / 2; - - p_size = vpx_get_size_of_pixel(tab->bd_tag); - - /* remember to offset if requested */ - y += view_y; - x += view_x ; - - /* for planar destinations */ - w = view_w; - h = vs_height; - - if (w < frame_buffer->y_width) - { - frame_buffer->y_width = w; - frame_buffer->uv_width = (w + 1) / 2; - } - - if (h < frame_buffer->y_height) - { - frame_buffer->y_height = h; - frame_buffer->uv_height = (h + 1) / 2; - } - - if (frame_buffer->y_width < view_w) - x += (view_w - frame_buffer->y_width) / 2; - - if (x & 1) - x -= 1; - - if (frame_buffer->y_height < vs_height) - y += (vs_height - frame_buffer->y_height) / 2; - - - ptr_scrn += (x * p_size) + (y * this_pitch); - - frame_buffer->y_stride *= -1; - frame_buffer->uv_stride *= -1; - - if (tab->bd_tag == VPXDXV_YV12 || tab->bd_tag == VPXDXV_I420) - { - if (this_pitch < 0) - { - frame_buffer->uv_start = (char *)(ptr_scrn + abs(this_pitch) + abs(this_pitch) * h / 4 + this_pitch / 2); - frame_buffer->uv_dst_area = abs((this_pitch * h) / 4); - frame_buffer->uv_used_area = 0; - } - else - { - frame_buffer->uv_start = (char *)(ptr_scrn + (this_pitch * h)); - frame_buffer->uv_dst_area = (((this_pitch + 1) / 2) * ((h + 1) / 2)); - frame_buffer->uv_used_area = (((this_pitch + 1) / 2) * frame_buffer->uv_height); - } - } - - if ((pbi->mb.segmentation_enabled & SEGMENT_PF) && (tab->bd_tag != VPXDXV_YV12 && tab->bd_tag != VPXDXV_I420)) - { - int ypitch = frame_buffer->y_stride; - int uvpitch = frame_buffer->uv_stride; - - frame_buffer->y_stride <<= 1; - frame_buffer->y_height >>= 1; - frame_buffer->uv_stride <<= 1; - frame_buffer->uv_height >>= 1; - - ptr_scrn += this_pitch; - frame_buffer->y_buffer -= ypitch; - frame_buffer->u_buffer -= uvpitch; - frame_buffer->v_buffer -= uvpitch; - tab->blitter(ptr_scrn, 2 * this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer)); - - ptr_scrn -= this_pitch; - frame_buffer->y_buffer += ypitch; - frame_buffer->u_buffer += uvpitch; - frame_buffer->v_buffer += uvpitch; - tab->blitter(ptr_scrn, 2 * this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer)); - - } - else - { - /* blit the screen */ - tab->blitter(ptr_scrn, this_pitch, (YUV_BUFFER_CONFIG *)(&tab->frame_buffer)); - vpx_log("Decoder: Frame shown \n"); - } - - } - else - vpx_log("Decoder: Frame not shown scrn pointer 0\n"); - } - else - vpx_log("Decoder: Frame not shown vscreen 0\n"); - - return DXV_OK; -} -/**************************************************************************** - * - * ROUTINE : onyx_decompress - * - * INPUTS : None - * - * OUTPUTS : None - * - * RETURNS : None. - * - * FUNCTION : - * - * SPECIAL NOTES : - * - ****************************************************************************/ -static -int onyx_decompress(XIMAGE_HANDLE src, VSCREEN_HANDLE v_screen) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - unsigned char *c_addr; - unsigned int c_size; - int w, h, x, y; - int vp8_rv; - - c_addr = vpxdxv_get_ximage_cdata_addr(src); - c_size = vpxdxv_get_ximage_csize(src); - vpxdxv_get_ximage_xywh(src, &x, &y, &w, &h); - - // if we have a compressed frame decompress it ( otherwise we'll just redo - // the scaling and postprocessing from the last frame ) - if (c_addr) - { - if (c_size != 0) - { - int flags; - int ret_val; - - int f; - - // decode the frame - ret_val = vp8d_decompress_frame((VP8D_PTR) this_algorithm_base->my_pbi, - c_size, - (char *) c_addr, - &this_algorithm_base->this_buffer, - &flags); - - - f = this_algorithm_base->my_pbi->common.filter_level * 10 / 6; - - if (this_algorithm_base->my_pbi->common.frame_type == KEY_FRAME) - this_algorithm_base->avgq = 8 * f; - else - this_algorithm_base->avgq = this_algorithm_base->avgq * 7 / 8 + f; - - - - if (ret_val != 0) - { - if (ret_val == -1) - return DXV_VERSION_CONFLICT; - else - return DXV_BAD_DATA; - } - - } - } - - - vp8_rv = onyx_blit(src, v_screen, &this_algorithm_base->frame_buffer, x, y); - - - return vp8_rv; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static -int vp8_ximagedestroy(XIMAGE_HANDLE src) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - if (this_algorithm_base) - { - - vp8_yv12_de_alloc_frame_buffer(&this_algorithm_base->scaled_buffer); - - /* safety check in case stopdecode was not called */ - if (this_algorithm_base->owned) - vp8dx_remove_decompressor((VP8D_PTR)(this_algorithm_base->my_pbi)); - - duck_free(this_algorithm_base); - } - - return DXV_OK; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static int -onyx_get_post_proc(XIMAGE_HANDLE src, unsigned int *ppl) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - if (this_algorithm_base) - { - *ppl = this_algorithm_base->ppl_tag; - - return DXV_OK; - } - - return DXV_NULL_BASE; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static int -onyx_set_post_proc(XIMAGE_HANDLE src, unsigned int ppl) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - if (this_algorithm_base) - { - this_algorithm_base->ppl_tag = ppl; - - return DXV_OK; - } - - return DXV_NULL_BASE; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static -int vp8_ximagestop_decode(XIMAGE_HANDLE src) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - if (this_algorithm_base) - { - - vp8_yv12_de_alloc_frame_buffer(&this_algorithm_base->scaled_buffer); - - if (this_algorithm_base->owned) - vp8dx_remove_decompressor((VP8D_PTR)(this_algorithm_base->my_pbi)); - - this_algorithm_base->owned = 0; - } - - return DXV_OK; -} - - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static -int vp8_ximagestart_decode -( - XIMAGE_HANDLE src -) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - XIMAGE_INFO_PTR xinfo = vpxdxv_get_ximage_info(src); - VP8D_CONFIG ocf; - - if (xinfo) - { - ocf.Width = xinfo->width; - ocf.Height = xinfo->height; - } - - if (this_algorithm_base->common == 0) - { - this_algorithm_base->my_pbi = (VP8D_COMP *) vp8dx_create_decompressor(&ocf); - this_algorithm_base->owned = 1; - this_algorithm_base->common = &this_algorithm_base->my_pbi->common; - this_algorithm_base->avgq = 0; - - } - - this_algorithm_base->passed_in_buffer = 0; - this_algorithm_base->post_proc2time = 0; - this_algorithm_base->post_proc4time = 0; - this_algorithm_base->ppcount = 64; - - return DXV_OK; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static -DXV_HANDLE vp8_ximagecreate(XIMAGE_HANDLE src) -{ - VP8_XIMAGE_HANDLE this_algorithm_base; - - /* create a new algorithm base container */ - this_algorithm_base = (VP8_XIMAGE_HANDLE)duck_calloc(1, sizeof(VP8_XIMAGE), DMEM_GENERAL); - - if (this_algorithm_base == NULL) - return NULL; - - vp8_scale_machine_specific_config(); - - vpxdxv_register_ximage_start_decode(src, vp8_ximagestart_decode); - - vpxdxv_register_ximage_stop_decode(src, vp8_ximagestop_decode); - - vpxdxv_register_ximage_destroy(src, vp8_ximagedestroy); - - vpxdxv_register_ximage_dx(src, onyx_decompress); - - vpxdxv_register_ximage_set_parameter(src, onyx_set_parameter); - - vpxdxv_register_ximage_output_format_func(src, - onyx_get_output_format, - onyx_set_output_format); - - vpxdxv_register_ximage_post_proc_level_func(src, - onyx_get_post_proc, - onyx_set_post_proc); - - return (DXV_HANDLE)this_algorithm_base; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -static int store_output_list(unsigned int supported, int count, - unsigned int *outlist) -{ - int i = 0, j = 0, - ret = DXV_OK; - - while (i < count) - { - while (supported && !(supported & 0x01)) - { - supported >>= 1; - ++j; - } - - *(outlist + i) = g_vp8_preferred_output_format_list[j]; - ++i; - ++j; - supported >>= 1; - } - - - return ret; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static int onyx_get_output_list(XIMAGE_INFO_PTR xinfo, unsigned int *outlist, - unsigned int *size) -{ - int i, - ret = DXV_INVALID_REQUEST; - unsigned int supported = 0, - count = 0; - (void)xinfo; - - if (size) - { - for (i = 0; i < sizeof(g_vp8_preferred_output_format_list) / sizeof(unsigned int) && i < 32; ++i) - { - if (vpx_get_blitter(g_vp8_preferred_output_format_list[i]) != (void *)0xffffffff) - { - supported |= (1 << i); - ++count; - } - } - - if (outlist) - { - if (count && ((count + 1) == (*size / sizeof(int)))) - ret = store_output_list(supported, count, outlist); - else - *outlist = 0; - } - else - { - *size = (count + 1) * sizeof(int); - ret = DXV_OK; - } - } - - return ret; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -int onyx_init(void) -{ - int vp8_rv; - - /* register VPX blitters based on cpu */ - vpx_set_blit(); - - vp8_rv = vpxdxv_register_ximage(vp8_ximagecreate, onyx_get_output_list, VP8_FOURCC); - return vp8_rv; - - return DXV_OK; -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -int onyx_exit(void) -{ - - vpxdxv_un_register_ximage(VP8_FOURCC); - - return DXV_OK; -} -/**************************************************************************** - * - * ROUTINE : onyx_set_parameter - * - * INPUTS : XIMAGE_HANDLE src : - * int Command : - * unsigned long Parameter : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : - * - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void onyx_set_parameter(XIMAGE_HANDLE src, int Command, unsigned int Parameter) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - switch (Command) - { - case PBC_SET_CPUFREE: - this_algorithm_base->cpu_free = Parameter; - break; - case PBC_SET_POSTPROC: - this_algorithm_base->postproc = Parameter; - break; - - case PBC_SET_BLITBUFF: - this_algorithm_base->passed_in_buffer = (YV12_BUFFER_CONFIG *) Parameter; - break; - - case PBC_SET_REFERENCEFRAME: - { - VP8_XIMAGE_HANDLE tab = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - VP8D_COMP *pbi; - pbi = tab->my_pbi; - vp8_yv12_copy_frame((YV12_BUFFER_CONFIG *) Parameter, &pbi->common.last_frame); - } - break; - - case PBC_SET_COMMON: - - if (Parameter) - { - this_algorithm_base->common = (VP8_COMMON *)Parameter; - } - - break; - case PBC_SET_ADDNOISE: - this_algorithm_base->add_noise = Parameter; - break; - case PBC_SET_DEINTERLACEMODE: - this_algorithm_base->deinterlace = Parameter; - break; - - } -} -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static int -onyx_get_output_format(XIMAGE_HANDLE src, unsigned int *format_tag) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - - if (this_algorithm_base) - { - *format_tag = this_algorithm_base->bd_tag; - return DXV_OK; - } - - return DXV_NULL_BASE; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -static int -onyx_set_output_format(XIMAGE_HANDLE src, unsigned int bd_tag) -{ - VP8_XIMAGE_HANDLE this_algorithm_base = (VP8_XIMAGE_HANDLE)vpxdxv_get_algorithm_base_ptr(src); - int i; - unsigned int bd_tag_found; - - if (this_algorithm_base) - { - i = 0; - bd_tag_found = 0; - - while (g_vp8_preferred_output_format_list[i] != 0) - { - if (g_vp8_preferred_output_format_list[i] == bd_tag) - { - bd_tag_found = 1; - break; - } - - i++; - } - - if (bd_tag_found) - { - this_algorithm_base->blitter = (vp8blit_func)vpx_get_blitter(bd_tag); - this_algorithm_base->bd_tag = bd_tag; - return DXV_OK; - } - - return DXV_INVALID_BLIT; - } - - return DXV_NULL_BASE; -} - -/* -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -int -vpx_get_size_of_pixel(unsigned int bd) -{ - int vp8_rv; - - switch (bd) - { - case VPXDXV_YV12: - case VPXDXV_I420: - vp8_rv = 1; - break; - -#ifdef _ENABLE_SPLIT_PIXEL_ - case VPXDXV_SPLIT565: -#endif - case VPXDXV_RGB555: - case VPXDXV_RGB565: - case VPXDXV_YUY2: - case VPXDXV_UYVY: - case VPXDXV_YVYU: - vp8_rv = 2; - break; - - case VPXDXV_RGB888: - vp8_rv = 3; - break; - - case VPXDXV_RGB8888: - vp8_rv = 4; - break; - - default: - vp8_rv = -1; - break; - } - - return vp8_rv; -} diff -Nru libvpx-0.9.5/vp8/decoder/x86/x86_dsystemdependent.c libvpx-0.9.6/vp8/decoder/x86/x86_dsystemdependent.c --- libvpx-0.9.5/vp8/decoder/x86/x86_dsystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/decoder/x86/x86_dsystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ #include "vpx_ports/config.h" #include "vpx_ports/x86.h" -#include "onyxd_int.h" +#include "vp8/decoder/onyxd_int.h" #if HAVE_MMX diff -Nru libvpx-0.9.5/vp8/encoder/arm/arm_csystemdependent.c libvpx-0.9.6/vp8/encoder/arm/arm_csystemdependent.c --- libvpx-0.9.5/vp8/encoder/arm/arm_csystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/arm_csystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,8 +11,8 @@ #include "vpx_ports/config.h" #include "vpx_ports/arm.h" -#include "variance.h" -#include "onyx_int.h" +#include "vp8/encoder/variance.h" +#include "vp8/encoder/onyx_int.h" extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); @@ -29,8 +29,8 @@ #if HAVE_ARMV6 if (has_media) { - /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; + cpi->rtcd.variance.sad16x16 = vp8_sad16x16_armv6; + /*cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ @@ -38,14 +38,17 @@ /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/ + cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/ + cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6; /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/ + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6; /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm libvpx-0.9.6/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm --- libvpx-0.9.5/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm 2011-03-04 20:40:40.000000000 +0000 @@ -14,7 +14,7 @@ EXPORT |vp8_stop_encode| EXPORT |vp8_encode_value| - INCLUDE vpx_vp8_enc_asm_offsets.asm + INCLUDE asm_enc_offsets.asm ARM REQUIRE8 diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm --- libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ EXPORT |vp8cx_pack_tokens_armv5| - INCLUDE vpx_vp8_enc_asm_offsets.asm + INCLUDE asm_enc_offsets.asm ARM REQUIRE8 @@ -29,10 +29,9 @@ push {r4-r11, lr} ; Add size of xcount * sizeof (TOKENEXTRA) to get stop - ; sizeof (TOKENEXTRA) is 20 - add r2, r2, r2, lsl #2 ; xcount + ; sizeof (TOKENEXTRA) is 8 sub sp, sp, #12 - add r2, r1, r2, lsl #2 ; stop = p + xcount + add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) str r2, [sp, #0] str r3, [sp, #8] ; save vp8_coef_encodings ldr r2, [r0, #vp8_writer_lowvalue] @@ -41,13 +40,13 @@ b check_p_lt_stop while_p_lt_stop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r4, [sp, #8] ; vp8_coef_encodings mov lr, #0 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t ldr r9, [r1, #tokenextra_context_tree] ; pp - ldr r7, [r1, #tokenextra_skip_eob_node] + ldrb r7, [r1, #tokenextra_skip_eob_node] ldr r6, [r4, #vp8_token_value] ; v ldr r8, [r4, #vp8_token_len] ; n @@ -142,12 +141,11 @@ subs r8, r8, #1 ; --n bne token_loop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r7, [sp, #48] ; vp8_extra_bits ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 20 - add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t - add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t ldr r4, [r12, #vp8_extra_bit_struct_base_val] cmp r4, #0 @@ -155,7 +153,7 @@ ; if( b->base_val) ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldr lr, [r1, #tokenextra_extra] ; e = p->Extra + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra cmp r8, #0 ; if( L) beq no_extra_bits diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm --- libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ EXPORT |vp8cx_pack_mb_row_tokens_armv5| - INCLUDE vpx_vp8_enc_asm_offsets.asm + INCLUDE asm_enc_offsets.asm ARM REQUIRE8 @@ -62,13 +62,13 @@ ; actuall work gets done here! while_p_lt_stop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r4, [sp, #20] ; vp8_coef_encodings mov lr, #0 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t ldr r9, [r1, #tokenextra_context_tree] ; pp - ldr r7, [r1, #tokenextra_skip_eob_node] + ldrb r7, [r1, #tokenextra_skip_eob_node] ldr r6, [r4, #vp8_token_value] ; v ldr r8, [r4, #vp8_token_len] ; n @@ -163,12 +163,11 @@ subs r8, r8, #1 ; --n bne token_loop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r7, [sp, #8] ; vp8_extra_bits ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 20 - add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t - add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t ldr r4, [r12, #vp8_extra_bit_struct_base_val] cmp r4, #0 @@ -176,7 +175,7 @@ ; if( b->base_val) ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldr lr, [r1, #tokenextra_extra] ; e = p->Extra + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra cmp r8, #0 ; if( L) beq no_extra_bits diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm --- libvpx-0.9.5/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ EXPORT |vp8cx_pack_tokens_into_partitions_armv5| - INCLUDE vpx_vp8_enc_asm_offsets.asm + INCLUDE asm_enc_offsets.asm ARM REQUIRE8 @@ -65,6 +65,8 @@ numparts_loop ldr r10, [sp, #40] ; ptr ldr r5, [sp, #36] ; move mb_rows to the counting section + sub r5, r5, r11 ; move start point with each partition + ; mb_rows starts at i str r5, [sp, #12] ; Reset all of the VP8 Writer data for each partition that @@ -90,13 +92,13 @@ ; actual work gets done here! while_p_lt_stop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r4, [sp, #80] ; vp8_coef_encodings mov lr, #0 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t ldr r9, [r1, #tokenextra_context_tree] ; pp - ldr r7, [r1, #tokenextra_skip_eob_node] + ldrb r7, [r1, #tokenextra_skip_eob_node] ldr r6, [r4, #vp8_token_value] ; v ldr r8, [r4, #vp8_token_len] ; n @@ -191,12 +193,11 @@ subs r8, r8, #1 ; --n bne token_loop - ldr r6, [r1, #tokenextra_token] ; t + ldrb r6, [r1, #tokenextra_token] ; t ldr r7, [sp, #84] ; vp8_extra_bits ; Add t * sizeof (vp8_extra_bit_struct) to get the desired - ; element. Here vp8_extra_bit_struct == 20 - add r6, r6, r6, lsl #2 ; b = vp8_extra_bits + t - add r12, r7, r6, lsl #2 ; b = vp8_extra_bits + t + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t ldr r4, [r12, #vp8_extra_bit_struct_base_val] cmp r4, #0 @@ -204,7 +205,7 @@ ; if( b->base_val) ldr r8, [r12, #vp8_extra_bit_struct_len] ; L - ldr lr, [r1, #tokenextra_extra] ; e = p->Extra + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra cmp r8, #0 ; if( L) beq no_extra_bits diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm libvpx-0.9.6/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm --- libvpx-0.9.5/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv6/vp8_sad16x16_armv6.asm 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,84 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_sad16x16_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 const unsigned char *src_ptr +; r1 int src_stride +; r2 const unsigned char *ref_ptr +; r3 int ref_stride +; stack max_sad (not used) +|vp8_sad16x16_armv6| PROC + stmfd sp!, {r4-r12, lr} + mov r4, #0 ; sad = 0; + mov r5, #8 ; loop count + +loop + ; 1st row + ldr r6, [r0, #0x0] ; load 4 src pixels (1A) + ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) + ldr r7, [r0, #0x4] ; load 4 src pixels (1A) + ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) + ldr r10, [r0, #0x8] ; load 4 src pixels (1B) + ldr r11, [r0, #0xC] ; load 4 src pixels (1B) + + usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels + usad8 r8, r7, r9 ; calculate sad for 4 pixels + + ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) + ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) + + add r0, r0, r1 ; set src pointer to next row + add r2, r2, r3 ; set dst pointer to next row + + usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels + usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels + + ldr r6, [r0, #0x0] ; load 4 src pixels (2A) + ldr r7, [r0, #0x4] ; load 4 src pixels (2A) + add r4, r4, r8 ; add partial sad values + + ; 2nd row + ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) + ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) + ldr r10, [r0, #0x8] ; load 4 src pixels (2B) + ldr r11, [r0, #0xC] ; load 4 src pixels (2B) + + usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels + usad8 r8, r7, r9 ; calculate sad for 4 pixels + + ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) + ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) + + add r0, r0, r1 ; set src pointer to next row + add r2, r2, r3 ; set dst pointer to next row + + usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels + usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels + + subs r5, r5, #1 ; decrement loop counter + add r4, r4, r8 ; add partial sad values + + bne loop + + mov r0, r4 ; return sad + ldmfd sp!, {r4-r12, pc} + + ENDP + + END + diff -Nru libvpx-0.9.5/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm libvpx-0.9.6/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm --- libvpx-0.9.5/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,147 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance16x16_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance16x16_armv6| PROC + + stmfd sp!, {r4-r12, lr} + mov r12, #16 ; set loop counter to 16 (=block height) + mov r8, #0 ; initialize sum = 0 + mov r11, #0 ; initialize sse = 0 + +loop + ; 1st 4 pixels + ldr r4, [r0, #0x0] ; load 4 src pixels + ldr r5, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #0x4] ; load 4 src pixels + ldr r5, [r2, #0x4] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #0x8] ; load 4 src pixels + ldr r5, [r2, #0x8] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #0xc] ; load 4 src pixels + ldr r5, [r2, #0xc] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + + subs r12, r12, #1 + + bne loop + + ; return stuff + ldr r6, [sp, #0x28] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, ASR #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + + END diff -Nru libvpx-0.9.5/vp8/encoder/arm/boolhuff_arm.c libvpx-0.9.6/vp8/encoder/arm/boolhuff_arm.c --- libvpx-0.9.5/vp8/encoder/arm/boolhuff_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/boolhuff_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ -#include "boolhuff.h" -#include "blockd.h" +#include "vp8/encoder/boolhuff.h" +#include "vp8/common/blockd.h" const unsigned int vp8_prob_cost[256] = { diff -Nru libvpx-0.9.5/vp8/encoder/arm/encodemb_arm.c libvpx-0.9.6/vp8/encoder/arm/encodemb_arm.c --- libvpx-0.9.5/vp8/encoder/arm/encodemb_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/encodemb_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,13 +9,13 @@ */ -#include "encodemb.h" -#include "reconinter.h" -#include "quantize.h" -#include "invtrans.h" -#include "recon.h" -#include "reconintra.h" -#include "dct.h" +#include "vp8/encoder/encodemb.h" +#include "vp8/common/reconinter.h" +#include "vp8/encoder/quantize.h" +#include "vp8/common/invtrans.h" +#include "vp8/common/recon.h" +#include "vp8/common/reconintra.h" +#include "vp8/encoder/dct.h" #include "vpx_mem/vpx_mem.h" extern void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch); diff -Nru libvpx-0.9.5/vp8/encoder/arm/neon/fastfdct4x4_neon.asm libvpx-0.9.6/vp8/encoder/arm/neon/fastfdct4x4_neon.asm --- libvpx-0.9.5/vp8/encoder/arm/neon/fastfdct4x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/neon/fastfdct4x4_neon.asm 2011-03-04 20:40:40.000000000 +0000 @@ -112,10 +112,7 @@ ENDP ;----------------- - AREA fastfdct_dat, DATA, READONLY -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _ffdct_coeff_ DCD ffdct_coeff ffdct_coeff diff -Nru libvpx-0.9.5/vp8/encoder/arm/neon/fastfdct8x4_neon.asm libvpx-0.9.6/vp8/encoder/arm/neon/fastfdct8x4_neon.asm --- libvpx-0.9.5/vp8/encoder/arm/neon/fastfdct8x4_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/neon/fastfdct8x4_neon.asm 2011-03-04 20:40:40.000000000 +0000 @@ -165,10 +165,7 @@ ENDP ;----------------- - AREA fastfdct8x4_dat, DATA, READONLY -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _ffdct8_coeff_ DCD ffdct8_coeff ffdct8_coeff diff -Nru libvpx-0.9.5/vp8/encoder/arm/neon/shortfdct_neon.asm libvpx-0.9.6/vp8/encoder/arm/neon/shortfdct_neon.asm --- libvpx-0.9.5/vp8/encoder/arm/neon/shortfdct_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/neon/shortfdct_neon.asm 2011-03-04 20:40:40.000000000 +0000 @@ -122,10 +122,7 @@ ENDP ;----------------- - AREA dct4x4_dat, DATA, READONLY -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _dct_matrix_ DCD dct_matrix dct_matrix diff -Nru libvpx-0.9.5/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm libvpx-0.9.6/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm --- libvpx-0.9.5/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ ; - EXPORT |vp8_sub_pixel_variance16x16_neon| + EXPORT |vp8_sub_pixel_variance16x16_neon_func| ARM REQUIRE8 PRESERVE8 @@ -24,7 +24,7 @@ ; stack(r6) unsigned int *sse ;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon. -|vp8_sub_pixel_variance16x16_neon| PROC +|vp8_sub_pixel_variance16x16_neon_func| PROC push {r4-r6, lr} ldr r12, _BilinearTaps_coeff_ @@ -416,10 +416,7 @@ ENDP ;----------------- - AREA vp8e_bilinear_taps_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _BilinearTaps_coeff_ DCD bilinear_taps_coeff bilinear_taps_coeff diff -Nru libvpx-0.9.5/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm libvpx-0.9.6/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm --- libvpx-0.9.5/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm 2011-03-04 20:40:40.000000000 +0000 @@ -215,10 +215,7 @@ ENDP ;----------------- - AREA bilinear_taps_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... + _BilinearTaps_coeff_ DCD bilinear_taps_coeff bilinear_taps_coeff diff -Nru libvpx-0.9.5/vp8/encoder/arm/picklpf_arm.c libvpx-0.9.6/vp8/encoder/arm/picklpf_arm.c --- libvpx-0.9.5/vp8/encoder/arm/picklpf_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/picklpf_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,13 +9,13 @@ */ -#include "onyxc_int.h" -#include "onyx_int.h" -#include "quantize.h" +#include "vp8/common/onyxc_int.h" +#include "vp8/encoder/onyx_int.h" +#include "vp8/encoder/quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" -#include "alloccommon.h" +#include "vp8/common/alloccommon.h" extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); diff -Nru libvpx-0.9.5/vp8/encoder/arm/quantize_arm.c libvpx-0.9.6/vp8/encoder/arm/quantize_arm.c --- libvpx-0.9.5/vp8/encoder/arm/quantize_arm.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/quantize_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -12,9 +12,8 @@ #include #include "vpx_mem/vpx_mem.h" -#include "quantize.h" -#include "entropy.h" -#include "predictdc.h" +#include "vp8/encoder/quantize.h" +#include "vp8/common/entropy.h" DECLARE_ALIGNED(16, const short, vp8_rvsplus1_default_zig_zag1d[16]) = { @@ -29,7 +28,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { - d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant); + d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant_fast); } /* diff -Nru libvpx-0.9.5/vp8/encoder/arm/variance_arm.c libvpx-0.9.6/vp8/encoder/arm/variance_arm.c --- libvpx-0.9.5/vp8/encoder/arm/variance_arm.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/variance_arm.c 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_config.h" +#include "vp8/encoder/variance.h" +#include "vp8/common/filter.h" +#include "vp8/common/arm/bilinearfilter_arm.h" + +#if HAVE_ARMV6 + +unsigned int vp8_sub_pixel_variance16x16_armv6 +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse +) +{ + unsigned short first_pass[36*16]; + unsigned char second_pass[20*16]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, + src_pixels_per_line, + 17, 16, HFilter); + vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, + 16, 16, 16, VFilter); + + return vp8_variance16x16_armv6(second_pass, 16, dst_ptr, + dst_pixels_per_line, sse); +} + +unsigned int vp8_variance_halfpixvar16x16_h_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + +unsigned int vp8_variance_halfpixvar16x16_v_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + +unsigned int vp8_variance_halfpixvar16x16_hv_armv6( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} + +#endif /* HAVE_ARMV6 */ + + +#if HAVE_ARMV7 + +unsigned int vp8_sub_pixel_variance16x16_neon +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse +) +{ + if (xoffset == 4 && yoffset == 0) + return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else if (xoffset == 0 && yoffset == 4) + return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else if (xoffset == 4 && yoffset == 4) + return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse); + else + return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); +} + +#endif diff -Nru libvpx-0.9.5/vp8/encoder/arm/variance_arm.h libvpx-0.9.6/vp8/encoder/arm/variance_arm.h --- libvpx-0.9.5/vp8/encoder/arm/variance_arm.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/variance_arm.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,6 +12,40 @@ #ifndef VARIANCE_ARM_H #define VARIANCE_ARM_H +#if HAVE_ARMV6 + +extern prototype_sad(vp8_sad16x16_armv6); +extern prototype_variance(vp8_variance16x16_armv6); +extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_variance_sad16x16 +#define vp8_variance_sad16x16 vp8_sad16x16_armv6 + +#undef vp8_variance_subpixvar16x16 +#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6 + +#undef vp8_variance_var16x16 +#define vp8_variance_var16x16 vp8_variance16x16_armv6 + +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6 + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_armv6 + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_armv6 + +#endif /* !CONFIG_RUNTIME_CPU_DETECT */ + +#endif /* HAVE_ARMV6 */ + + #if HAVE_ARMV7 extern prototype_sad(vp8_sad4x4_neon); extern prototype_sad(vp8_sad8x8_neon); @@ -30,6 +64,7 @@ //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c); //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon); +extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon_func); extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon); extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon); extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); diff -Nru libvpx-0.9.5/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c libvpx-0.9.6/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c --- libvpx-0.9.5/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include - -#include "../treewriter.h" -#include "../tokenize.h" -#include "../onyx_int.h" - -#define ct_assert(name,cond) \ - static void assert_##name(void) UNUSED;\ - static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} - -#define DEFINE(sym, val) int sym = val; - -/* -#define BLANK() asm volatile("\n->" : : ) -*/ - -/* - * int main(void) - * { - */ - -DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue)); -DEFINE(vp8_writer_range, offsetof(vp8_writer, range)); -DEFINE(vp8_writer_value, offsetof(vp8_writer, value)); -DEFINE(vp8_writer_count, offsetof(vp8_writer, count)); -DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos)); -DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer)); - -DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token)); -DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra)); -DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree)); -DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node)); -DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA)); - -DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct)); - -DEFINE(vp8_token_value, offsetof(vp8_token, value)); -DEFINE(vp8_token_len, offsetof(vp8_token, Len)); - -DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree)); -DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob)); -DEFINE(vp8_extra_bit_struct_prob_bc, offsetof(vp8_extra_bit_struct, prob_bc)); -DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len)); -DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val)); - -DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist)); -DEFINE(vp8_comp_common, offsetof(VP8_COMP, common)); -DEFINE(vp8_comp_bc2, offsetof(VP8_COMP, bc2)); - -DEFINE(tokenlist_start, offsetof(TOKENLIST, start)); -DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop)); -DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST)); - -DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows)); - -// These two sizes are used in vp7cx_pack_tokens. They are hard coded -// so if the size changes this will have to be adjusted. -ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 20) -ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 20) - -//add asserts for any offset that is not supported by assembly code -//add asserts for any size that is not supported by assembly code -/* - * return 0; - * } - */ diff -Nru libvpx-0.9.5/vp8/encoder/asm_enc_offsets.c libvpx-0.9.6/vp8/encoder/asm_enc_offsets.c --- libvpx-0.9.5/vp8/encoder/asm_enc_offsets.c 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/asm_enc_offsets.c 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include + +#include "treewriter.h" +#include "tokenize.h" +#include "onyx_int.h" + +#define ct_assert(name,cond) \ + static void assert_##name(void) UNUSED;\ + static void assert_##name(void) {switch(0){case 0:case !!(cond):;}} + +#define DEFINE(sym, val) int sym = val; + +/* +#define BLANK() asm volatile("\n->" : : ) +*/ + +/* + * int main(void) + * { + */ + +//pack tokens +DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue)); +DEFINE(vp8_writer_range, offsetof(vp8_writer, range)); +DEFINE(vp8_writer_value, offsetof(vp8_writer, value)); +DEFINE(vp8_writer_count, offsetof(vp8_writer, count)); +DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos)); +DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer)); + +DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token)); +DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra)); +DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree)); +DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node)); +DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA)); + +DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct)); + +DEFINE(vp8_token_value, offsetof(vp8_token, value)); +DEFINE(vp8_token_len, offsetof(vp8_token, Len)); + +DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree)); +DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob)); +DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len)); +DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val)); + +DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist)); +DEFINE(vp8_comp_common, offsetof(VP8_COMP, common)); +DEFINE(vp8_comp_bc2, offsetof(VP8_COMP, bc2)); + +DEFINE(tokenlist_start, offsetof(TOKENLIST, start)); +DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop)); +DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST)); + +DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows)); + +// These two sizes are used in vp8cx_pack_tokens. They are hard coded +// so if the size changes this will have to be adjusted. +#if HAVE_ARMV5TE +ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8) +ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16) +#endif + +//add asserts for any offset that is not supported by assembly code +//add asserts for any size that is not supported by assembly code +/* + * return 0; + * } + */ diff -Nru libvpx-0.9.5/vp8/encoder/bitstream.c libvpx-0.9.6/vp8/encoder/bitstream.c --- libvpx-0.9.5/vp8/encoder/bitstream.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/bitstream.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,15 +9,15 @@ */ -#include "header.h" +#include "vp8/common/header.h" #include "encodemv.h" -#include "entropymode.h" -#include "findnearmv.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/findnearmv.h" #include "mcomp.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" #include #include -#include "pragmas.h" +#include "vp8/common/pragmas.h" #include "vpx_mem/vpx_mem.h" #include "bitstream.h" @@ -58,16 +58,6 @@ int count_mb_seg[4] = { 0, 0, 0, 0 }; #endif -#if CONFIG_BIG_ENDIAN -# define make_endian_16(a) \ - (((unsigned int)(a & 0xff)) << 8) | (((unsigned int)(a & 0xff00)) >> 8) -# define make_endian_32(a) \ - (((unsigned int)(a & 0xff)) << 24) | (((unsigned int)(a & 0xff00)) << 8) | \ - (((unsigned int)(a & 0xff0000)) >> 8) | (((unsigned int)(a & 0xff000000)) >> 24) -#else -# define make_endian_16(a) a -# define make_endian_32(a) a -#endif static void update_mode( vp8_writer *const w, @@ -1392,13 +1382,20 @@ // every keyframe send startcode, width, height, scale factor, clamp and color type if (oh.type == KEY_FRAME) { + int v; + // Start / synch code cx_data[0] = 0x9D; cx_data[1] = 0x01; cx_data[2] = 0x2a; - *((unsigned short *)(cx_data + 3)) = make_endian_16((pc->horiz_scale << 14) | pc->Width); - *((unsigned short *)(cx_data + 5)) = make_endian_16((pc->vert_scale << 14) | pc->Height); + v = (pc->horiz_scale << 14) | pc->Width; + cx_data[3] = v; + cx_data[4] = v >> 8; + + v = (pc->vert_scale << 14) | pc->Height; + cx_data[5] = v; + cx_data[6] = v >> 8; extra_bytes_packed = 7; cx_data += extra_bytes_packed ; @@ -1654,29 +1651,28 @@ { vp8_start_encode(&cpi->bc2, cx_data + bc->pos); - if (!cpi->b_multi_threaded) - pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); - else +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) pack_mb_row_tokens(cpi, &cpi->bc2); + else +#endif + pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count); vp8_stop_encode(&cpi->bc2); oh.first_partition_length_in_bytes = cpi->bc.pos ; *size = cpi->bc2.pos + cpi->bc.pos + VP8_HEADER_SIZE + extra_bytes_packed; } -#if CONFIG_BIG_ENDIAN { int v = (oh.first_partition_length_in_bytes << 5) | (oh.show_frame << 4) | (oh.version << 1) | oh.type; - v = make_endian_32(v); - vpx_memcpy(dest, &v, 3); + dest[0] = v; + dest[1] = v >> 8; + dest[2] = v >> 16; } -#else - vpx_memcpy(dest, &oh, 3); -#endif } #ifdef ENTROPY_STATS diff -Nru libvpx-0.9.5/vp8/encoder/block.h libvpx-0.9.6/vp8/encoder/block.h --- libvpx-0.9.5/vp8/encoder/block.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/block.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,10 +12,10 @@ #ifndef __INC_BLOCK_H #define __INC_BLOCK_H -#include "onyx.h" -#include "blockd.h" -#include "entropymv.h" -#include "entropy.h" +#include "vp8/common/onyx.h" +#include "vp8/common/blockd.h" +#include "vp8/common/entropymv.h" +#include "vp8/common/entropy.h" #include "vpx_ports/mem.h" // motion search site @@ -33,6 +33,7 @@ // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries short *quant; + short *quant_fast; short *quant_shift; short *zbin; short *zrun_zbin_boost; @@ -81,6 +82,7 @@ int errthresh; int rddiv; int rdmult; + INT64 activity_sum; int mvcosts[2][MVvals+1]; int *mvcost[2]; @@ -110,6 +112,7 @@ unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; int optimize; + int q_index; void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); void (*vp8_short_fdct8x4)(short *input, short *output, int pitch); diff -Nru libvpx-0.9.5/vp8/encoder/boolhuff.c libvpx-0.9.6/vp8/encoder/boolhuff.c --- libvpx-0.9.5/vp8/encoder/boolhuff.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/boolhuff.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,7 +10,7 @@ #include "boolhuff.h" -#include "blockd.h" +#include "vp8/common/blockd.h" diff -Nru libvpx-0.9.5/vp8/encoder/encodeframe.c libvpx-0.9.6/vp8/encoder/encodeframe.c --- libvpx-0.9.5/vp8/encoder/encodeframe.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/encodeframe.c 2011-03-04 20:40:40.000000000 +0000 @@ -12,22 +12,22 @@ #include "vpx_ports/config.h" #include "encodemb.h" #include "encodemv.h" -#include "common.h" +#include "vp8/common/common.h" #include "onyx_int.h" -#include "extend.h" -#include "entropymode.h" -#include "quant_common.h" +#include "vp8/common/extend.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/quant_common.h" #include "segmentation.h" -#include "setupintrarecon.h" +#include "vp8/common/setupintrarecon.h" #include "encodeintra.h" -#include "reconinter.h" +#include "vp8/common/reconinter.h" #include "rdopt.h" #include "pickinter.h" -#include "findnearmv.h" -#include "reconintra.h" +#include "vp8/common/findnearmv.h" +#include "vp8/common/reconintra.h" #include #include -#include "subpixel.h" +#include "vp8/common/subpixel.h" #include "vpx_ports/vpx_timer.h" #if CONFIG_RUNTIME_CPU_DETECT @@ -62,7 +62,6 @@ static const int qrounding_factors[129] = { - 56, 56, 56, 56, 48, 48, 56, 56, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, @@ -78,15 +77,18 @@ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48 }; static const int qzbin_factors[129] = { - 72, 72, 72, 72, 80, 80, 72, 72, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, @@ -97,14 +99,11 @@ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, + 80 }; static const int qrounding_factors_y2[129] = { - 56, 56, 56, 56, 48, 48, 56, 56, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, @@ -120,12 +119,18 @@ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48 }; static const int qzbin_factors_y2[129] = { - 72, 72, 72, 72, 80, 80, 72, 72, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, @@ -136,26 +141,30 @@ 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, + 80 }; -//#define EXACT_QUANT +#define EXACT_QUANT #ifdef EXACT_QUANT -static void vp8cx_invert_quant(short *quant, short *shift, short d) +static void vp8cx_invert_quant(int improved_quant, short *quant, + short *shift, short d) { - unsigned t; - int l; - t = d; - for(l = 0; t > 1; l++) - t>>=1; - t = 1 + (1<<(16+l))/d; - *quant = (short)(t - (1<<16)); - *shift = l; + if(improved_quant) + { + unsigned t; + int l; + t = d; + for(l = 0; t > 1; l++) + t>>=1; + t = 1 + (1<<(16+l))/d; + *quant = (short)(t - (1<<16)); + *shift = l; + } + else + { + *quant = (1 << 16) / d; + *shift = 0; + } } void vp8cx_init_quantizer(VP8_COMP *cpi) @@ -170,7 +179,8 @@ { // dc values quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - vp8cx_invert_quant(cpi->Y1quant[Q] + 0, + cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, cpi->Y1quant_shift[Q] + 0, quant_val); cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; @@ -178,7 +188,8 @@ cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); - vp8cx_invert_quant(cpi->Y2quant[Q] + 0, + cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0, cpi->Y2quant_shift[Q] + 0, quant_val); cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7; @@ -186,7 +197,8 @@ cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); - vp8cx_invert_quant(cpi->UVquant[Q] + 0, + cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0, cpi->UVquant_shift[Q] + 0, quant_val); cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; @@ -199,7 +211,8 @@ int rc = vp8_default_zig_zag1d[i]; quant_val = vp8_ac_yquant(Q); - vp8cx_invert_quant(cpi->Y1quant[Q] + rc, + cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc, cpi->Y1quant_shift[Q] + rc, quant_val); cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; @@ -207,7 +220,8 @@ cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); - vp8cx_invert_quant(cpi->Y2quant[Q] + rc, + cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc, cpi->Y2quant_shift[Q] + rc, quant_val); cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7; @@ -215,7 +229,8 @@ cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); - vp8cx_invert_quant(cpi->UVquant[Q] + rc, + cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc, cpi->UVquant_shift[Q] + rc, quant_val); cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; @@ -316,6 +331,7 @@ for (i = 0; i < 16; i++) { x->block[i].quant = cpi->Y1quant[QIndex]; + x->block[i].quant_fast = cpi->Y1quant_fast[QIndex]; x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].round = cpi->Y1round[QIndex]; @@ -330,6 +346,7 @@ for (i = 16; i < 24; i++) { x->block[i].quant = cpi->UVquant[QIndex]; + x->block[i].quant_fast = cpi->UVquant_fast[QIndex]; x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].round = cpi->UVround[QIndex]; @@ -340,6 +357,7 @@ // Y2 zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7; + x->block[24].quant_fast = cpi->Y2quant_fast[QIndex]; x->block[24].quant = cpi->Y2quant[QIndex]; x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; @@ -347,22 +365,100 @@ x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; x->block[24].zbin_extra = (short)zbin_extra; -} -void vp8cx_frame_init_quantizer(VP8_COMP *cpi) + /* save this macroblock QIndex for vp8_update_zbin_extra() */ + x->q_index = QIndex; +} +void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) { - // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called - // when these values are not all zero. - if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q) + int i; + int QIndex = x->q_index; + int zbin_extra; + + // Y + zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; + for (i = 0; i < 16; i++) { - vp8cx_init_quantizer(cpi); + x->block[i].zbin_extra = (short)zbin_extra; } + // UV + zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; + for (i = 16; i < 24; i++) + { + x->block[i].zbin_extra = (short)zbin_extra; + } + + // Y2 + zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7; + x->block[24].zbin_extra = (short)zbin_extra; +} + +void vp8cx_frame_init_quantizer(VP8_COMP *cpi) +{ + // Clear Zbin mode boost for default case + cpi->zbin_mode_boost = 0; + // MB level quantizer setup vp8cx_mb_init_quantizer(cpi, &cpi->mb); } +/* activity_avg must be positive, or flat regions could get a zero weight + * (infinite lambda), which confounds analysis. + * This also avoids the need for divide by zero checks in + * vp8_activity_masking(). + */ +#define VP8_ACTIVITY_AVG_MIN (64) + +/* This is used as a reference when computing the source variance for the + * purposes of activity masking. + * Eventually this should be replaced by custom no-reference routines, + * which will be faster. + */ +static const unsigned char VP8_VAR_OFFS[16]= +{ + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 +}; + +unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) +{ + unsigned int act; + unsigned int sse; + int sum; + unsigned int a; + unsigned int b; + /* TODO: This could also be done over smaller areas (8x8), but that would + * require extensive changes elsewhere, as lambda is assumed to be fixed + * over an entire MB in most of the code. + * Another option is to compute four 8x8 variances, and pick a single + * lambda using a non-linear combination (e.g., the smallest, or second + * smallest, etc.). + */ + VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, + x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum); + /* This requires a full 32 bits of precision. */ + act = (sse<<8) - sum*sum; + /* Drop 4 to give us some headroom to work with. */ + act = (act + 8) >> 4; + /* If the region is flat, lower the activity some more. */ + if (act < 8<<12) + act = act < 5<<12 ? act : 5<<12; + /* TODO: For non-flat regions, edge regions should receive less masking + * than textured regions, but identifying edge regions quickly and + * reliably enough is still a subject of experimentation. + * This will be most noticable near edges with a complex shape (e.g., + * text), but the 4x4 transform size should make this less of a problem + * than it would be for an 8x8 transform. + */ + /* Apply the masking to the RD multiplier. */ + a = act + 4*cpi->activity_avg; + b = 4*act + cpi->activity_avg; + x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a); + return act; +} + + static void encode_mb_row(VP8_COMP *cpi, @@ -374,6 +470,7 @@ int *segment_counts, int *totalrate) { + INT64 activity_sum = 0; int i; int recon_yoffset, recon_uvoffset; int mb_col; @@ -383,6 +480,16 @@ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int seg_map_index = (mb_row * cpi->common.mb_cols); +#if CONFIG_MULTITHREAD + const int nsync = cpi->mt_sync_range; + const int rightmost_col = cm->mb_cols - 1; + volatile const int *last_row_current_mb_col; + + if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) + last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; + else + last_row_current_mb_col = &rightmost_col; +#endif // reset above block coeffs xd->above_context = cm->above_context; @@ -402,14 +509,14 @@ // Set up limit values for vertical motion vector components // to prevent them extending beyond the UMV borders x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // Distance of Mb to the left & right edges, specified in - // 1/8th pel units as they are always compared to values + // Distance of Mb to the left & right edges, specified in + // 1/8th pel units as they are always compared to values // that are in 1/8th pel units xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; @@ -417,7 +524,7 @@ // Set up limit values for horizontal motion vector components // to prevent them extending beyond the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; @@ -425,6 +532,27 @@ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + +#if CONFIG_MULTITHREAD + if ((cpi->b_multi_threaded != 0) && (mb_row != 0)) + { + if ((mb_col & (nsync - 1)) == 0) + { + while (mb_col > (*last_row_current_mb_col - nsync) + && (*last_row_current_mb_col) != (cm->mb_cols - 1)) + { + x86_pause_hint(); + thread_sleep(0); + } + } + } +#endif + + if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + activity_sum += vp8_activity_masking(cpi, x); + // Is segmentation enabled // MB level adjutment to quantizer if (xd->segmentation_enabled) @@ -518,7 +646,12 @@ x->partition_info++; xd->above_context++; - cpi->current_mb_col_main = mb_col; +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded != 0) + { + cpi->mt_current_mb_col[mb_row] = mb_col; + } +#endif } //extend the recon for intra prediction @@ -531,11 +664,15 @@ // this is to account for the border xd->mode_info_context++; x->partition_info++; -} - - - + x->activity_sum += activity_sum; +#if CONFIG_MULTITHREAD + if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1)) + { + sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ + } +#endif +} void vp8_encode_frame(VP8_COMP *cpi) { @@ -544,7 +681,6 @@ VP8_COMMON *const cm = & cpi->common; MACROBLOCKD *const xd = & x->e_mbd; - int i; TOKENEXTRA *tp = cpi->tok; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; @@ -627,9 +763,7 @@ } vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); - //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) ); vp8cx_initialize_me_consts(cpi, cm->base_qindex); - //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex); // Copy data over into macro block data sturctures. @@ -647,22 +781,7 @@ vp8_setup_block_ptrs(x); - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - -#if 0 - // Experimental rd code - // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics - // such as cpi->rate_correction_factor that indicate relative complexity. - /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) ) - { - //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb; - x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor); - } - else - x->rdmult = cpi->RDMULT; */ - //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 )); -#endif + x->activity_sum = 0; xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; @@ -681,47 +800,23 @@ struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); - if (!cpi->b_multi_threaded) +#if CONFIG_MULTITHREAD + if (cpi->b_multi_threaded) { - // for each macroblock row in image - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { + int i; - vp8_zero(cm->left_context) + vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); + for (i = 0; i < cm->mb_rows; i++) + cpi->mt_current_mb_col[i] = 0; - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + for (i = 0; i < cpi->encoding_thread_count; i++) + { + sem_post(&cpi->h_event_start_encoding[i]); } - cpi->tok_count = tp - cpi->tok; - - } - else - { -#if CONFIG_MULTITHREAD - vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { - int i; - cpi->current_mb_col_main = -1; - - for (i = 0; i < cpi->encoding_thread_count; i++) - { - if ((mb_row + i + 1) >= cm->mb_rows) - break; - - cpi->mb_row_ei[i].mb_row = mb_row + i + 1; - cpi->mb_row_ei[i].tp = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24); - cpi->mb_row_ei[i].current_mb_col = -1; - //SetEvent(cpi->h_event_mbrencoding[i]); - sem_post(&cpi->h_event_mbrencoding[i]); - } - vp8_zero(cm->left_context) tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); @@ -736,26 +831,10 @@ xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; - if (mb_row < cm->mb_rows - 1) - //WaitForSingleObject(cpi->h_event_main, INFINITE); - sem_wait(&cpi->h_event_main); } - /* - for( ;mb_rowmb_rows; mb_row ++) - { - vp8_zero( cm->left_context) - - tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); - - encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); - // adjust to the next row of mbs - x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */ - } - */ cpi->tok_count = 0; for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++) @@ -765,7 +844,6 @@ if (xd->segmentation_enabled) { - int i, j; if (xd->segmentation_enabled) @@ -777,7 +855,6 @@ segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j]; } } - } for (i = 0; i < cpi->encoding_thread_count; i++) @@ -785,7 +862,30 @@ totalrate += cpi->mb_row_ei[i].totalrate; } + for (i = 0; i < cpi->encoding_thread_count; i++) + { + x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum; + } + + } + else #endif + { + // for each macroblock row in image + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) + { + + vp8_zero(cm->left_context) + + encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); + + // adjust to the next row of mbs + x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; + } + + cpi->tok_count = tp - cpi->tok; } @@ -920,6 +1020,14 @@ cpi->last_frame_distortion = cpi->frame_distortion; #endif + /* Update the average activity for the next frame. + * This is feed-forward for now; it could also be saved in two-pass, or + * done during lookahead when that is eventually added. + */ + cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs); + if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) + cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; + } void vp8_setup_block_ptrs(MACROBLOCK *x) { @@ -1040,77 +1148,41 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { int Error4x4, Error16x16, error_uv; - B_PREDICTION_MODE intra_bmodes[16]; int rate4x4, rate16x16, rateuv; int dist4x4, dist16x16, distuv; int rate = 0; int rate4x4_tokenonly = 0; int rate16x16_tokenonly = 0; int rateuv_tokenonly = 0; - int i; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; #if !(CONFIG_REALTIME_ONLY) - - if (cpi->sf.RD || cpi->compressor_speed != 2) + if (cpi->sf.RD && cpi->compressor_speed != 2) { - Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4); - - //save the b modes for possible later use - for (i = 0; i < 16; i++) - intra_bmodes[i] = x->e_mbd.block[i].bmi.mode; - - Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16); - error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); - - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); rate += rateuv; - if (Error4x4 < Error16x16) - { - rate += rate4x4; - x->e_mbd.mode_info_context->mbmi.mode = B_PRED; - - // get back the intra block modes - for (i = 0; i < 16; i++) - x->e_mbd.block[i].bmi.mode = intra_bmodes[i]; - - vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); - cpi->prediction_error += Error4x4 ; -#if 0 - // Experimental RD code - cpi->frame_distortion += dist4x4; -#endif - } - else - { - vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - rate += rate16x16; - -#if 0 - // Experimental RD code - cpi->prediction_error += Error16x16; - cpi->frame_distortion += dist16x16; -#endif - } + Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16); - sum_intra_stats(cpi, x); + Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16); - vp8_tokenize_mb(cpi, &x->e_mbd, t); + rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16; } else #endif { - - int rate2, distortion2; + int rate2, best_distortion; MB_PREDICTION_MODE mode, best_mode = DC_PRED; int this_rd; Error16x16 = INT_MAX; + vp8_pick_intra_mbuv_mode(x); + for (mode = DC_PRED; mode <= TM_PRED; mode ++) { + int distortion2; + x->e_mbd.mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby_ptr(&x->e_mbd); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); @@ -1121,34 +1193,27 @@ { Error16x16 = this_rd; best_mode = mode; + best_distortion = distortion2; } } + x->e_mbd.mode_info_context->mbmi.mode = best_mode; - vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2); - - if (distortion2 == INT_MAX) - Error4x4 = INT_MAX; - else - Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); - - if (Error4x4 < Error16x16) - { - x->e_mbd.mode_info_context->mbmi.mode = B_PRED; - vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); - cpi->prediction_error += Error4x4; - } - else - { - x->e_mbd.mode_info_context->mbmi.mode = best_mode; - vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - cpi->prediction_error += Error16x16; - } + Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion); + } - vp8_pick_intra_mbuv_mode(x); - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - sum_intra_stats(cpi, x); - vp8_tokenize_mb(cpi, &x->e_mbd, t); + if (Error4x4 < Error16x16) + { + x->e_mbd.mode_info_context->mbmi.mode = B_PRED; + vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); } + else + { + vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); + } + + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); + sum_intra_stats(cpi, x); + vp8_tokenize_mb(cpi, &x->e_mbd, t); return rate; } @@ -1181,7 +1246,28 @@ if (cpi->sf.RD) { + int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; + + /* Are we using the fast quantizer for the mode selection? */ + if(cpi->sf.use_fastquant_for_pick) + { + cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb); + + /* the fast quantizer does not use zbin_extra, so + * do not recalculate */ + cpi->zbin_mode_boost_enabled = 0; + } inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error); + + /* switch back to the regular quantizer for the encode */ + if (cpi->sf.improved_quant) + { + cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb); + } + + /* restore cpi->zbin_mode_boost_enabled */ + cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; + } else #endif @@ -1198,7 +1284,7 @@ #endif // MB level adjutment to quantizer setup - if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled) + if (xd->segmentation_enabled) { // If cyclic update enabled if (cpi->cyclic_refresh_mode_enabled) @@ -1208,19 +1294,38 @@ ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) { xd->mode_info_context->mbmi.segment_id = 0; + + /* segment_id changed, so update */ + vp8cx_mb_init_quantizer(cpi, x); } } + } + { // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise if (cpi->zbin_mode_boost_enabled) { - if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME ) + cpi->zbin_mode_boost = 0; else - cpi->zbin_mode_boost = 0; + { + if (xd->mode_info_context->mbmi.mode == ZEROMV) + { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (xd->mode_info_context->mbmi.mode == SPLITMV) + cpi->zbin_mode_boost = 0; + else + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } } + else + cpi->zbin_mode_boost = 0; - vp8cx_mb_init_quantizer(cpi, x); + vp8_update_zbin_extra(cpi, x); } cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; diff -Nru libvpx-0.9.5/vp8/encoder/encodeintra.c libvpx-0.9.6/vp8/encoder/encodeintra.c --- libvpx-0.9.5/vp8/encoder/encodeintra.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/encodeintra.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,15 +10,15 @@ #include "vpx_ports/config.h" -#include "idct.h" +#include "vp8/common/idct.h" #include "quantize.h" -#include "reconintra.h" -#include "reconintra4x4.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" #include "encodemb.h" -#include "invtrans.h" -#include "recon.h" +#include "vp8/common/invtrans.h" +#include "vp8/common/recon.h" #include "dct.h" -#include "g_common.h" +#include "vp8/common/g_common.h" #include "encodeintra.h" #define intra4x4ibias_rate 128 @@ -58,21 +58,6 @@ RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } -void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode) -{ - vp8_predict_intra4x4(b, best_mode, b->predictor); - - ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16); - - x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); - - x->quantize_b(be, b); - - IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32); - - RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); -} - void vp8_encode_intra4x4mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb) { int i; @@ -105,7 +90,7 @@ #if !(CONFIG_REALTIME_ONLY) #if 1 - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) + if (x->optimize) vp8_optimize_mby(x, rtcd); #endif @@ -144,51 +129,6 @@ } } -void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) -{ - int b; - - vp8_build_intra_predictors_mby_ptr(&x->e_mbd); - - ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride); - - vp8_transform_intra_mby(x); - - vp8_quantize_mby(x); - - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - - RECON_INVOKE(&rtcd->common->recon, recon_mby) - (IF_RTCD(&rtcd->common->recon), &x->e_mbd); - - // make sure block modes are set the way we want them for context updates - for (b = 0; b < 16; b++) - { - BLOCKD *d = &x->e_mbd.block[b]; - - switch (x->e_mbd.mode_info_context->mbmi.mode) - { - - case DC_PRED: - d->bmi.mode = B_DC_PRED; - break; - case V_PRED: - d->bmi.mode = B_VE_PRED; - break; - case H_PRED: - d->bmi.mode = B_HE_PRED; - break; - case TM_PRED: - d->bmi.mode = B_TM_PRED; - break; - default: - d->bmi.mode = B_DC_PRED; - break; - - } - } -} - void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_intra_predictors_mbuv(&x->e_mbd); @@ -213,17 +153,3 @@ vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); } -void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) -{ - vp8_build_intra_predictors_mbuv(&x->e_mbd); - - ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - - vp8_transform_mbuv(x); - - vp8_quantize_mbuv(x); - - vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - - vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); -} diff -Nru libvpx-0.9.5/vp8/encoder/encodeintra.h libvpx-0.9.6/vp8/encoder/encodeintra.h --- libvpx-0.9.5/vp8/encoder/encodeintra.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/encodeintra.h 2011-03-04 20:40:40.000000000 +0000 @@ -19,7 +19,5 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode); void vp8_update_mode_context(int *abmode, int *lbmode, int i, int best_mode); void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *, MACROBLOCK *x, BLOCK *be, BLOCKD *b, int best_mode); -void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x); -void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *, MACROBLOCK *x); #endif diff -Nru libvpx-0.9.5/vp8/encoder/encodemb.c libvpx-0.9.6/vp8/encoder/encodemb.c --- libvpx-0.9.5/vp8/encoder/encodemb.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/encodemb.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,12 +11,12 @@ #include "vpx_ports/config.h" #include "encodemb.h" -#include "reconinter.h" +#include "vp8/common/reconinter.h" #include "quantize.h" #include "tokenize.h" -#include "invtrans.h" -#include "recon.h" -#include "reconintra.h" +#include "vp8/common/invtrans.h" +#include "vp8/common/recon.h" +#include "vp8/common/reconintra.h" #include "dct.h" #include "vpx_mem/vpx_mem.h" @@ -243,9 +243,9 @@ }; // TODO: experiments to find optimal multiple numbers -#define Y1_RD_MULT 1 -#define UV_RD_MULT 1 -#define Y2_RD_MULT 4 +#define Y1_RD_MULT 4 +#define UV_RD_MULT 2 +#define Y2_RD_MULT 16 static const int plane_rd_mult[4]= { @@ -273,7 +273,6 @@ int x; int sz; int next; - int path; int rdmult; int rddiv; int final_eob; @@ -309,8 +308,10 @@ eob = d->eob; /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - /* TODO: These should vary with the block type, since the quantizer does. */ - rdmult = (mb->rdmult << 2)*err_mult; + rdmult = mb->rdmult * err_mult; + if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME) + rdmult = (rdmult * 9)>>4; + rddiv = mb->rddiv; best_mask[0] = best_mask[1] = 0; /* Initialize the sentinel node of the trellis. */ @@ -517,7 +518,7 @@ has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - type = has_2nd_order ? 0 : 3; + type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b++) { @@ -525,23 +526,16 @@ ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } - for (b = 16; b < 20; b++) - { - vp8_optimize_b(x, b, vp8_block2type[b], - ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); - } - - for (b = 20; b < 24; b++) + for (b = 16; b < 24; b++) { - vp8_optimize_b(x, b, vp8_block2type[b], + vp8_optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } - if (has_2nd_order) { b=24; - vp8_optimize_b(x, b, vp8_block2type[b], + vp8_optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } } @@ -571,7 +565,7 @@ has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - type = has_2nd_order ? 0 : 3; + type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; b++) { @@ -583,7 +577,7 @@ if (has_2nd_order) { b=24; - vp8_optimize_b(x, b, vp8_block2type[b], + vp8_optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } } @@ -607,18 +601,11 @@ ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - for (b = 16; b < 20; b++) + for (b = 16; b < 24; b++) { - vp8_optimize_b(x, b, vp8_block2type[b], + vp8_optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } - - for (b = 20; b < 24; b++) - { - vp8_optimize_b(x, b, vp8_block2type[b], - ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); - } - } #endif @@ -633,7 +620,7 @@ vp8_quantize_mb(x); #if !(CONFIG_REALTIME_ONLY) - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) + if (x->optimize) vp8_optimize_mb(x, rtcd); #endif diff -Nru libvpx-0.9.5/vp8/encoder/encodemv.c libvpx-0.9.6/vp8/encoder/encodemv.c --- libvpx-0.9.5/vp8/encoder/encodemv.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/encodemv.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,10 +9,10 @@ */ -#include "common.h" +#include "vp8/common/common.h" #include "encodemv.h" -#include "entropymode.h" -#include "systemdependent.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/systemdependent.h" #include @@ -128,7 +128,7 @@ while (--i > 3); - if (x & 240) + if (x & 0xFFF0) cost += vp8_cost_bit(p [MVPbits + 3], (x >> 3) & 1); } diff -Nru libvpx-0.9.5/vp8/encoder/ethreading.c libvpx-0.9.6/vp8/encoder/ethreading.c --- libvpx-0.9.5/vp8/encoder/ethreading.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/ethreading.c 2011-03-04 20:40:40.000000000 +0000 @@ -8,15 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "onyx_int.h" -#include "threading.h" -#include "common.h" -#include "extend.h" +#include "vp8/common/threading.h" +#include "vp8/common/common.h" +#include "vp8/common/extend.h" +#if CONFIG_MULTITHREAD -extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset); -extern int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); +extern int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t, int recon_yoffset, + int recon_uvoffset); +extern int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, + TOKENEXTRA **t); extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); @@ -24,12 +27,12 @@ static THREAD_FUNCTION thread_encoding_proc(void *p_data) { -#if CONFIG_MULTITHREAD int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; - VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); + VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); ENTROPY_CONTEXT_PLANES mb_row_left_context; + const int nsync = cpi->mt_sync_range; //printf("Started thread %d\n", ithread); while (1) @@ -38,189 +41,213 @@ break; //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0) - if (sem_wait(&cpi->h_event_mbrencoding[ithread]) == 0) + if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) { + VP8_COMMON *cm = &cpi->common; + int mb_row; + MACROBLOCK *x = &mbri->mb; + MACROBLOCKD *xd = &x->e_mbd; + TOKENEXTRA *tp ; + + int *segment_counts = mbri->segment_counts; + int *totalrate = &mbri->totalrate; + if (cpi->b_multi_threaded == FALSE) // we're shutting down break; - else + + for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { - VP8_COMMON *cm = &cpi->common; - int mb_row = mbri->mb_row; - MACROBLOCK *x = &mbri->mb; - MACROBLOCKD *xd = &x->e_mbd; - TOKENEXTRA **tp = &mbri->tp; - int *segment_counts = mbri->segment_counts; - int *totalrate = &mbri->totalrate; - { - int i; - int recon_yoffset, recon_uvoffset; - int mb_col; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - volatile int *last_row_current_mb_col; + int i; + int recon_yoffset, recon_uvoffset; + int mb_col; + int ref_fb_idx = cm->lst_fb_idx; + int dst_fb_idx = cm->new_fb_idx; + int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + volatile int *last_row_current_mb_col; + INT64 activity_sum = 0; - if (ithread > 0) - last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col; - else - last_row_current_mb_col = &cpi->current_mb_col_main; + tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); - // reset above block coeffs - xd->above_context = cm->above_context; - xd->left_context = &mb_row_left_context; + last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; - vp8_zero(mb_row_left_context); + // reset above block coeffs + xd->above_context = cm->above_context; + xd->left_context = &mb_row_left_context; - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8); + vp8_zero(mb_row_left_context); + xd->up_available = (mb_row != 0); + recon_yoffset = (mb_row * recon_y_stride * 16); + recon_uvoffset = (mb_row * recon_uv_stride * 8); - cpi->tplist[mb_row].start = *tp; + cpi->tplist[mb_row].start = tp; - //printf("Thread mb_row = %d\n", mb_row); + //printf("Thread mb_row = %d\n", mb_row); - // for each macroblock col in image - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int seg_map_index = (mb_row * cm->mb_cols); + // for each macroblock col in image + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int seg_map_index = (mb_row * cm->mb_cols); - while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != cm->mb_cols - 1) + if ((mb_col & (nsync - 1)) == 0) + { + while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != cm->mb_cols - 1) { x86_pause_hint(); thread_sleep(0); } + } - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - - // Set up limit values for motion vectors used to prevent them extending outside the UMV borders - x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - - // Is segmentation enabled - // MB level adjutment to quantizer - if (xd->segmentation_enabled) - { - // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) - if (cpi->segmentation_map[seg_map_index+mb_col] <= 3) - xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col]; - else - xd->mode_info_context->mbmi.segment_id = 0; - - vp8cx_mb_init_quantizer(cpi, x); - } + // Distance of Mb to the various image edges. + // These specified to 8th pel as they are always compared to values that are in 1/8th pel units + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; + + // Set up limit values for motion vectors used to prevent them extending outside the UMV borders + x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); + + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + xd->left_available = (mb_col != 0); + + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + activity_sum += vp8_activity_masking(cpi, x); + + // Is segmentation enabled + // MB level adjutment to quantizer + if (xd->segmentation_enabled) + { + // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) + if (cpi->segmentation_map[seg_map_index + mb_col] <= 3) + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index + mb_col]; else - xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default + xd->mode_info_context->mbmi.segment_id = 0; + vp8cx_mb_init_quantizer(cpi, x); + } + else + xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default - if (cm->frame_type == KEY_FRAME) - { - *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp); + x->active_ptr = cpi->active_map + seg_map_index + mb_col; + + if (cm->frame_type == KEY_FRAME) + { + *totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp); #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mbmi.mode] ++; #endif - } - else - { - *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset); + } + else + { + *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset); #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) - { - int b; + if (xd->mbmi.mode == SPLITMV) + { + int b; - for (b = 0; b < xd->mbmi.partition_count; b++) - { - inter_b_modes[x->partition->bmi[b].mode] ++; - } + for (b = 0; b < xd->mbmi.partition_count; b++) + { + inter_b_modes[x->partition->bmi[b].mode] ++; } - -#endif - - // Count of last ref frame 0,0 useage - if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) - cpi->inter_zz_count ++; - } - cpi->tplist[mb_row].stop = *tp; - - x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb - - for (i = 0; i < 16; i++) - vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi)); +#endif - // adjust to the next column of macroblocks - x->src.y_buffer += 16; - x->src.u_buffer += 8; - x->src.v_buffer += 8; + // Count of last ref frame 0,0 useage + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) + cpi->inter_zz_count++; + + // Special case code for cyclic refresh + // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode + // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map + if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) + { + const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + cpi->segmentation_map[seg_map_index + mb_col] = mbmi->segment_id; - recon_yoffset += 16; - recon_uvoffset += 8; + // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): + // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) + // else mark it as dirty (1). + if (mbmi->segment_id) + cpi->cyclic_refresh_map[seg_map_index + mb_col] = -1; + else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) + { + if (cpi->cyclic_refresh_map[seg_map_index + mb_col] == 1) + cpi->cyclic_refresh_map[seg_map_index + mb_col] = 0; + } + else + cpi->cyclic_refresh_map[seg_map_index + mb_col] = 1; - // Keep track of segment useage - segment_counts[xd->mode_info_context->mbmi.segment_id] ++; + } + } + cpi->tplist[mb_row].stop = tp; - // skip to next mb - xd->mode_info_context++; - x->partition_info++; + x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb - xd->above_context++; + for (i = 0; i < 16; i++) + vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi)); - cpi->mb_row_ei[ithread].current_mb_col = mb_col; + // adjust to the next column of macroblocks + x->src.y_buffer += 16; + x->src.u_buffer += 8; + x->src.v_buffer += 8; - } + recon_yoffset += 16; + recon_uvoffset += 8; - //extend the recon for intra prediction - vp8_extend_mb_row( - &cm->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, - xd->dst.v_buffer + 8); + // Keep track of segment useage + segment_counts[xd->mode_info_context->mbmi.segment_id]++; - // this is to account for the border + // skip to next mb xd->mode_info_context++; x->partition_info++; + xd->above_context++; - x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; - x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; + cpi->mt_current_mb_col[mb_row] = mb_col; + } - xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; - x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; + //extend the recon for intra prediction + vp8_extend_mb_row( + &cm->yv12_fb[dst_fb_idx], + xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, + xd->dst.v_buffer + 8); + + // this is to account for the border + xd->mode_info_context++; + x->partition_info++; + x->activity_sum += activity_sum; + + x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; + x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; + x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; - if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1) - { - //SetEvent(cpi->h_event_main); - sem_post(&cpi->h_event_main); - } + xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; + x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; + if (mb_row == cm->mb_rows - 1) + { + //SetEvent(cpi->h_event_main); + sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ } - } } } -#else - (void) p_data; -#endif - //printf("exit thread %d\n", ithread); return 0; } @@ -240,8 +267,6 @@ z->sadperbit16 = x->sadperbit16; z->sadperbit4 = x->sadperbit4; z->errthresh = x->errthresh; - z->rddiv = x->rddiv; - z->rdmult = x->rdmult; /* z->mv_col_min = x->mv_col_min; @@ -255,6 +280,7 @@ z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4; z->short_walsh4x4 = x->short_walsh4x4; z->quantize_b = x->quantize_b; + z->optimize = x->optimize; /* z->mvc = x->mvc; @@ -282,6 +308,7 @@ for (i = 0; i < 25; i++) { z->block[i].quant = x->block[i].quant; + z->block[i].quant_fast = x->block[i].quant_fast; z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; @@ -334,7 +361,6 @@ } } - void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x, MB_ROW_COMP *mbr_ei, @@ -385,15 +411,13 @@ mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); - vp8_build_block_offsets(mb); vp8_setup_block_dptrs(mbd); vp8_setup_block_ptrs(mb); - mb->rddiv = cpi->RDDIV; - mb->rdmult = cpi->RDMULT; + mb->activity_sum = 0; mbd->left_context = &cm->left_context; mb->mvc = cm->fc.mvc; @@ -403,17 +427,12 @@ } } - void vp8cx_create_encoder_threads(VP8_COMP *cpi) { cpi->b_multi_threaded = 0; cpi->processor_core_count = 32; //vp8_get_proc_core_count(); - CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); - -#if CONFIG_MULTITHREAD - if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; @@ -423,14 +442,15 @@ else cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1; - CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count)); - CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count)); + CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count)); + CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cpi->common.mb_rows)); + //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL); - sem_init(&cpi->h_event_main, 0, 0); + sem_init(&cpi->h_event_end_encoding, 0, 0); cpi->b_multi_threaded = 1; @@ -438,11 +458,13 @@ for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++) { + ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; + //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL); - sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0); - cpi->en_thread_data[ithread].ithread = ithread; - cpi->en_thread_data[ithread].ptr1 = (void *)cpi; - cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread]; + sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); + ethd->ithread = ithread; + ethd->ptr1 = (void *)cpi; + ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; //printf(" call begin thread %d \n", ithread); @@ -454,19 +476,15 @@ // 0, // NULL); - pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread])); - + pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); } } -#endif } void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { -#if CONFIG_MULTITHREAD - if (cpi->b_multi_threaded) { //shutdown other threads @@ -477,20 +495,21 @@ for (i = 0; i < cpi->encoding_thread_count; i++) { //SetEvent(cpi->h_event_mbrencoding[i]); - sem_post(&cpi->h_event_mbrencoding[i]); + sem_post(&cpi->h_event_start_encoding[i]); pthread_join(cpi->h_encoding_thread[i], 0); - } - for (i = 0; i < cpi->encoding_thread_count; i++) - sem_destroy(&cpi->h_event_mbrencoding[i]); + sem_destroy(&cpi->h_event_start_encoding[i]); + } } + + sem_destroy(&cpi->h_event_end_encoding); + //free thread related resources - vpx_free(cpi->h_event_mbrencoding); + vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); + vpx_free(cpi->mt_current_mb_col); } - -#endif - vpx_free(cpi->tplist); } +#endif diff -Nru libvpx-0.9.5/vp8/encoder/firstpass.c libvpx-0.9.6/vp8/encoder/firstpass.c --- libvpx-0.9.5/vp8/encoder/firstpass.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/firstpass.c 2011-03-04 20:40:40.000000000 +0000 @@ -8,25 +8,24 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "math.h" #include "limits.h" #include "block.h" #include "onyx_int.h" #include "variance.h" #include "encodeintra.h" -#include "setupintrarecon.h" +#include "vp8/common/setupintrarecon.h" #include "mcomp.h" #include "vpx_scale/vpxscale.h" #include "encodemb.h" -#include "extend.h" -#include "systemdependent.h" +#include "vp8/common/extend.h" +#include "vp8/common/systemdependent.h" #include "vpx_scale/yv12extend.h" #include "vpx_mem/vpx_mem.h" -#include "swapyv12buffer.h" +#include "vp8/common/swapyv12buffer.h" #include #include "rdopt.h" -#include "quant_common.h" +#include "vp8/common/quant_common.h" #include "encodemv.h" //#define OUTPUT_FPF 1 @@ -53,8 +52,11 @@ #define IIFACTOR 1.4 #define IIKFACTOR1 1.40 #define IIKFACTOR2 1.5 -#define RMAX 14.0 -#define GF_RMAX 48.0 // 128.0 +#define RMAX 14.0 +#define GF_RMAX 48.0 + +#define KF_MB_INTRA_MIN 300 +#define GF_MB_INTRA_MIN 200 #define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001) @@ -65,6 +67,18 @@ static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3}; +const int cq_level[QINDEX_RANGE] = +{ + 0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9, + 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20, + 20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31, + 32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43, + 44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56, + 57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70, + 71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85, + 86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 +}; + void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps); @@ -163,40 +177,68 @@ return modified_err; } +static const double weight_table[256] = { +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, +0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, +0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, +0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000 +}; + double vp8_simple_weight(YV12_BUFFER_CONFIG *source) { int i, j; unsigned char *src = source->y_buffer; - unsigned char value; double sum_weights = 0.0; - double Weight; // Loop throught the Y plane raw examining levels and creating a weight for the image - for (i = 0; i < source->y_height; i++) + i = source->y_height; + do { - for (j = 0; j < source->y_width; j++) + j = source->y_width; + do { - value = src[j]; - - if (value >= 64) - Weight = 1.0; - else if (value > 32) - Weight = (value - 32.0f) / 32.0f; - else - Weight = 0.02; - - sum_weights += Weight; - } - + sum_weights += weight_table[ *src]; + src++; + }while(--j); + src -= source->y_width; src += source->y_stride; - } + }while(--i); sum_weights /= (source->y_height * source->y_width); return sum_weights; } + // This function returns the current per frame maximum bitrate target int frame_max_bits(VP8_COMP *cpi) { @@ -247,7 +289,6 @@ * macroblock. */ size_t stats_sz; - FIRSTPASS_STATS stats; stats_sz = sizeof(FIRSTPASS_STATS) + mb_count; stats_sz = (stats_sz + 7) & ~7; @@ -374,8 +415,6 @@ } void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos) { - int Offset; - cpi->fp_motion_map_stats = target_pos; } @@ -428,7 +467,6 @@ vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats); } - void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset ) { MACROBLOCKD * const xd = & x->e_mbd; @@ -448,7 +486,6 @@ VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); } - void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset ) { MACROBLOCKD *const xd = & x->e_mbd; @@ -472,7 +509,7 @@ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; // Initial step/diamond search centred on best mv - tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost); + tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv); if ( tmp_err < INT_MAX-new_mv_mode_penalty ) tmp_err += new_mv_mode_penalty; @@ -495,7 +532,7 @@ num00--; else { - tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost); + tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv); if ( tmp_err < INT_MAX-new_mv_mode_penalty ) tmp_err += new_mv_mode_penalty; @@ -523,8 +560,8 @@ YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; int recon_y_stride = lst_yv12->y_stride; int recon_uv_stride = lst_yv12->uv_stride; - int intra_error = 0; - int coded_error = 0; + long long intra_error = 0; + long long coded_error = 0; int sum_mvr = 0, sum_mvc = 0; int sum_mvr_abs = 0, sum_mvc_abs = 0; @@ -536,7 +573,6 @@ int sum_in_vectors = 0; - MV best_ref_mv = {0, 0}; MV zero_ref_mv = {0, 0}; unsigned char *fp_motion_map_ptr = cpi->fp_motion_map; @@ -574,13 +610,20 @@ // for each macroblock row in image for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - MV best_ref_mv = {0, 0}; + int_mv best_ref_mv; + + best_ref_mv.as_int = 0; // reset above block coeffs xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); + // Set up limit values for motion vectors to prevent them extending outside the UMV borders + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); + + // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -605,7 +648,7 @@ this_error += intrapenalty; // Cumulative intra error total - intra_error += this_error; + intra_error += (long long)this_error; // Indicate default assumption of intra in the motion map *fp_motion_map_ptr = 0; @@ -613,8 +656,6 @@ // Set up limit values for motion vectors to prevent them extending outside the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); // Other than for the first frame do a motion search if (cm->current_video_frame > 0) @@ -635,12 +676,12 @@ // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - vp8_first_pass_motion_search(cpi, x, &best_ref_mv, + vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &d->bmi.mv.as_mv, lst_yv12, &motion_error, recon_yoffset); // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well - if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0)) + if (best_ref_mv.as_int) { tmp_err = INT_MAX; vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, @@ -652,7 +693,6 @@ d->bmi.mv.as_mv.row = tmp_mv.row; d->bmi.mv.as_mv.col = tmp_mv.col; } - } // Experimental search in a second reference frame ((0,0) based only) @@ -681,6 +721,9 @@ xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } + /* Intra assumed best */ + best_ref_mv.as_int = 0; + if (motion_error <= this_error) { d->bmi.mv.as_mv.row <<= 3; @@ -696,13 +739,10 @@ sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col; intercount++; - best_ref_mv.row = d->bmi.mv.as_mv.row; - best_ref_mv.col = d->bmi.mv.as_mv.col; - //best_ref_mv.row = 0; - //best_ref_mv.col = 0; + best_ref_mv.as_int = d->bmi.mv.as_int; // Was the vector non-zero - if (d->bmi.mv.as_mv.row || d->bmi.mv.as_mv.col) + if (d->bmi.mv.as_int) { mvcount++; @@ -758,15 +798,9 @@ *fp_motion_map_ptr = 1; } } - else - { - // Intra was best - best_ref_mv.row = 0; - best_ref_mv.col = 0; - } } - coded_error += this_error; + coded_error += (long long)this_error; // adjust to the next column of macroblocks x->src.y_buffer += 16; @@ -801,6 +835,7 @@ fps.coded_error = coded_error >> 8; weight = vp8_simple_weight(cpi->Source); + if (weight < 0.1) weight = 0.1; @@ -905,7 +940,7 @@ double pow_lowq = 0.40; if (section_target_bandwitdh <= 0) - return MAXQ; + return cpi->maxq_max_limit; // Highest value allowed target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); @@ -941,10 +976,12 @@ // Correction factor used for Q values >= 20 corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq); - corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; + corr_high = (corr_high < 0.05) + ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; - // Try and pick a Q that should be high enough to encode the content at the given rate. - for (Q = 0; Q < MAXQ; Q++) + // Try and pick a max Q that will be high enough to encode the + // content at the given rate. + for (Q = cpi->maxq_min_limit; Q < cpi->maxq_max_limit; Q++) { int bits_per_mb_at_this_q; @@ -963,6 +1000,28 @@ break; } + // Restriction on active max q for constrained quality mode. + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->cq_target_quality) ) + //(Q < cpi->oxcf.cq_level;) ) + { + Q = cpi->cq_target_quality; + //Q = cpi->oxcf.cq_level; + } + + // Adjust maxq_min_limit and maxq_max_limit limits based on + // averaga q observed in clip for non kf/gf.arf frames + // Give average a chance to settle though. + if ( (cpi->ni_frames > + ((unsigned int)cpi->total_stats->count >> 8)) && + (cpi->ni_frames > 150) ) + { + cpi->maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) + ? (cpi->ni_av_qi + 32) : cpi->worst_quality; + cpi->maxq_min_limit = ((cpi->ni_av_qi - 32) > cpi->best_quality) + ? (cpi->ni_av_qi - 32) : cpi->best_quality; + } + return Q; } static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width) @@ -1111,6 +1170,79 @@ return Q; } + +// For cq mode estimate a cq level that matches the observed +// complexity and data rate. +static int estimate_cq(VP8_COMP *cpi, double section_err, + int section_target_bandwitdh, int Height, int Width) +{ + int Q; + int num_mbs = ((Height * Width) / (16 * 16)); + int target_norm_bits_per_mb; + + double err_per_mb = section_err / num_mbs; + double correction_factor; + double corr_high; + double speed_correction = 1.0; + double pow_highq = 0.90; + double pow_lowq = 0.40; + double clip_iiratio; + double clip_iifactor; + + target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); + + // Corrections for higher compression speed settings + // (reduced compression expected) + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) + speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); + else + speed_correction = 1.25; + } + // II ratio correction factor for clip as a whole + clip_iiratio = cpi->total_stats->intra_error / + DOUBLE_DIVIDE_CHECK(cpi->total_stats->coded_error); + clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); + if (clip_iifactor < 0.80) + clip_iifactor = 0.80; + + // Correction factor used for Q values >= 20 + corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq); + corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; + + // Try and pick a Q that can encode the content at the given rate. + for (Q = 0; Q < MAXQ; Q++) + { + int bits_per_mb_at_this_q; + + if (Q < 50) + { + correction_factor = + pow( err_per_mb / BASE_ERRPERMB, (pow_lowq + Q * 0.01)); + + correction_factor = (correction_factor < 0.05) ? 0.05 + : (correction_factor > 5.0) ? 5.0 + : correction_factor; + } + else + correction_factor = corr_high; + + bits_per_mb_at_this_q = + (int)( .5 + correction_factor * + speed_correction * + clip_iifactor * + (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0); + + if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) + break; + } + + return cq_level[Q]; +} + extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate); void vp8_init_second_pass(VP8_COMP *cpi) @@ -1145,6 +1277,14 @@ cpi->output_frame_rate = cpi->oxcf.frame_rate; cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ; cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0); + cpi->clip_bits_total = cpi->bits_left; + + // Calculate a minimum intra value to be used in determining the IIratio + // scores used in the second pass. We have this minimum to make sure + // that clips that are static but "low complexity" in the intra domain + // are still boosted appropriately for KF/GF/ARF + cpi->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + cpi->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; vp8_avg_stats(cpi->total_stats); @@ -1173,17 +1313,25 @@ { start_pos = cpi->stats_in; // Note starting "file" position - cpi->modified_total_error_left = 0.0; + cpi->modified_error_total = 0.0; + cpi->modified_error_used = 0.0; while (vp8_input_stats(cpi, &this_frame) != EOF) { - cpi->modified_total_error_left += calculate_modified_err(cpi, &this_frame); + cpi->modified_error_total += calculate_modified_err(cpi, &this_frame); } + cpi->modified_error_left = cpi->modified_error_total; reset_fpf_position(cpi, start_pos); // Reset file position } + // Calculate the clip target modified bits per error + // The observed bpe starts as the same number. + cpi->clip_bpe = cpi->bits_left / + DOUBLE_DIVIDE_CHECK(cpi->modified_error_total); + cpi->observed_bpe = cpi->clip_bpe; + cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in; } @@ -1191,6 +1339,43 @@ { } +// This function gives and estimate of how badly we believe +// the predicition quality is decaying from frame to frame. +double gf_prediction_decay_rate(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) +{ + double prediction_decay_rate; + double motion_decay; + double motion_pct = next_frame->pcnt_motion; + + + // Initial basis is the % mbs inter coded + prediction_decay_rate = next_frame->pcnt_inter; + + // High % motion -> somewhat higher decay rate + motion_decay = (1.0 - (motion_pct / 20.0)); + if (motion_decay < prediction_decay_rate) + prediction_decay_rate = motion_decay; + + // Adjustment to decay rate based on speed of motion + { + double this_mv_rabs; + double this_mv_cabs; + double distance_factor; + + this_mv_rabs = fabs(next_frame->mvr_abs * motion_pct); + this_mv_cabs = fabs(next_frame->mvc_abs * motion_pct); + + distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + + (this_mv_cabs * this_mv_cabs)) / 250.0; + distance_factor = ((distance_factor > 1.0) + ? 0.0 : (1.0 - distance_factor)); + if (distance_factor < prediction_decay_rate) + prediction_decay_rate = distance_factor; + } + + return prediction_decay_rate; +} + // Analyse and define a gf/arf group . static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { @@ -1212,17 +1397,20 @@ double decay_accumulator = 1.0; double boost_factor = IIFACTOR; - double loop_decay_rate = 1.00; // Starting decay rate + double loop_decay_rate = 1.00; // Starting decay rate double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mod_err_per_mb_accumulator = 0.0; - int max_bits = frame_max_bits(cpi); // Max for a single frame + int max_bits = frame_max_bits(cpi); // Max for a single frame unsigned char *fpmm_pos; + unsigned int allow_alt_ref = + cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; + cpi->gf_group_bits = 0; cpi->gf_decay_rate = 0; @@ -1237,47 +1425,57 @@ // Preload the stats for the next frame. mod_frame_err = calculate_modified_err(cpi, this_frame); - // Note the error of the frame at the start of the group (this will be the GF frame error if we code a normal gf + // Note the error of the frame at the start of the group (this will be + // the GF frame error if we code a normal gf gf_first_frame_err = mod_frame_err; - // Special treatment if the current frame is a key frame (which is also a gf). - // If it is then its error score (and hence bit allocation) need to be subtracted out - // from the calculation for the GF group + // Special treatment if the current frame is a key frame (which is also + // a gf). If it is then its error score (and hence bit allocation) need + // to be subtracted out from the calculation for the GF group if (cpi->common.frame_type == KEY_FRAME) gf_group_err -= gf_first_frame_err; - // Scan forward to try and work out how many frames the next gf group should contain and - // what level of boost is appropriate for the GF or ARF that will be coded with the group + // Scan forward to try and work out how many frames the next gf group + // should contain and what level of boost is appropriate for the GF + // or ARF that will be coded with the group i = 0; - while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key)) + while (((i < cpi->static_scene_max_gf_interval) || + ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && + (i < cpi->frames_to_key)) { double r; double this_frame_mvr_ratio; double this_frame_mvc_ratio; double motion_decay; - double motion_pct = next_frame.pcnt_motion; + //double motion_pct = next_frame.pcnt_motion; + double motion_pct; - i++; // Increment the loop counter + i++; // Increment the loop counter // Accumulate error score of frames in this gf group mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; - mod_err_per_mb_accumulator += mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs); + mod_err_per_mb_accumulator += + mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs); if (EOF == vp8_input_stats(cpi, &next_frame)) break; // Accumulate motion stats. + motion_pct = next_frame.pcnt_motion; mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct); mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct); //Accumulate Motion In/Out of frame stats - this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion; - mv_in_out_accumulator += next_frame.mv_in_out_count * next_frame.pcnt_motion; - abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion); + this_frame_mv_in_out = + next_frame.mv_in_out_count * motion_pct; + mv_in_out_accumulator += + next_frame.mv_in_out_count * motion_pct; + abs_mv_in_out_accumulator += + fabs(next_frame.mv_in_out_count * motion_pct); // If there is a significant amount of motion if (motion_pct > 0.05) @@ -1306,65 +1504,98 @@ } // Underlying boost factor is based on inter intra error ratio - r = (boost_factor * (next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error))); - - // Increase boost for frames where new data coming into frame (eg zoom out) - // Slightly reduce boost if there is a net balance of motion out of the frame (zoom in) + r = ( boost_factor * + ( next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error))); + + if (next_frame.intra_error > cpi->gf_intra_err_min) + r = (IIKFACTOR2 * next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + else + r = (IIKFACTOR2 * cpi->gf_intra_err_min / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + + // Increase boost for frames where new data coming into frame + // (eg zoom out). Slightly reduce boost if there is a net balance + // of motion out of the frame (zoom in). // The range for this_frame_mv_in_out is -1.0 to +1.0 if (this_frame_mv_in_out > 0.0) r += r * (this_frame_mv_in_out * 2.0); + // In extreme case boost is halved else - r += r * (this_frame_mv_in_out / 2.0); // In extreme case boost is halved + r += r * (this_frame_mv_in_out / 2.0); if (r > GF_RMAX) r = GF_RMAX; - // Adjust loop decay rate - //if ( next_frame.pcnt_inter < loop_decay_rate ) - loop_decay_rate = next_frame.pcnt_inter; - - // High % motion -> somewhat higher decay rate - motion_decay = (1.0 - (motion_pct / 20.0)); - if (motion_decay < loop_decay_rate) - loop_decay_rate = motion_decay; - - // Adjustment to decay rate based on speed of motion - { - double this_mv_rabs; - double this_mv_cabs; - double distance_factor; - - this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct); - this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct); - - distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + - (this_mv_cabs * this_mv_cabs)) / 250.0; - distance_factor = ((distance_factor > 1.0) - ? 0.0 : (1.0 - distance_factor)); - if (distance_factor < loop_decay_rate) - loop_decay_rate = distance_factor; - } + loop_decay_rate = gf_prediction_decay_rate(cpi, &next_frame); // Cumulative effect of decay decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; - //decay_accumulator = ( loop_decay_rate < decay_accumulator ) ? loop_decay_rate : decay_accumulator; boost_score += (decay_accumulator * r); + // Break clause to detect very still sections after motion + // For example a staic image after a fade or other transition + // instead of a clean key frame. + if ( (i > MIN_GF_INTERVAL) && + (loop_decay_rate >= 0.999) && + (decay_accumulator < 0.9) ) + { + int j; + FIRSTPASS_STATS * position = cpi->stats_in; + FIRSTPASS_STATS tmp_next_frame; + double decay_rate; + + // Look ahead a few frames to see if static condition + // persists... + for ( j = 0; j < 4; j++ ) + { + if (EOF == vp8_input_stats(cpi, &tmp_next_frame)) + break; + + decay_rate = gf_prediction_decay_rate(cpi, &tmp_next_frame); + if ( decay_rate < 0.999 ) + break; + } + reset_fpf_position(cpi, position); // Reset file position + + // Force GF not alt ref + if ( j == 4 ) + { + if (0) + { + FILE *f = fopen("fadegf.stt", "a"); + fprintf(f, " %8d %8d %10.4f %10.4f %10.4f\n", + cpi->common.current_video_frame+i, i, + loop_decay_rate, decay_accumulator, + boost_score ); + fclose(f); + } + + allow_alt_ref = FALSE; + + boost_score = old_boost_score; + break; + } + } + // Break out conditions. - if ( /* i>4 || */ + if ( /* i>4 || */ + // Break at cpi->max_gf_interval unless almost totally static + (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) || ( - (i > MIN_GF_INTERVAL) && // Dont break out with a very short interval - ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && // Dont break out very close to a key frame + // Dont break out with a very short interval + (i > MIN_GF_INTERVAL) && + // Dont break out very close to a key frame + ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || (mv_in_out_accumulator < -2.0) || - ((boost_score - old_boost_score) < 2.0) - ) - ) - ) + ((boost_score - old_boost_score) < 2.0)) + ) ) { boost_score = old_boost_score; break; @@ -1375,7 +1606,8 @@ old_boost_score = boost_score; } - cpi->gf_decay_rate = (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; + cpi->gf_decay_rate = + (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; // When using CBR apply additional buffer related upper limits if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -1385,7 +1617,8 @@ // For cbr apply buffer related limits if (cpi->drop_frames_allowed) { - int df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); + int df_buffer_level = cpi->oxcf.drop_frames_water_mark * + (cpi->oxcf.optimal_buffer_level / 100); if (cpi->buffer_level > df_buffer_level) max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); @@ -1408,10 +1641,10 @@ cpi->gfu_boost = (int)(boost_score * 100.0) >> 4; // Should we use the alternate refernce frame - if (cpi->oxcf.play_alternate && - cpi->oxcf.lag_in_frames && + if (allow_alt_ref && (i >= MIN_GF_INTERVAL) && - (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) && // dont use ARF very near next kf + // dont use ARF very near next kf + (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) && (((next_frame.pcnt_inter > 0.75) && ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) && //(cpi->gfu_boost>150) && @@ -1439,7 +1672,7 @@ // Boost for arf frame Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); - Boost += (cpi->baseline_gf_interval * 50); + Boost += (i * 50); allocation_chunks = (i * 100) + Boost; // Normalize Altboost and allocations chunck down to prevent overflow @@ -1585,6 +1818,9 @@ // Reset the file position reset_fpf_position(cpi, start_pos); + // Update the record of error used so far (only done once per gf group) + cpi->modified_error_used += gf_group_err; + // Assign bits to the arf or gf. { int Boost; @@ -1738,17 +1974,9 @@ vp8_avg_stats(§ionstats); - if (sectionstats.pcnt_motion < .17) - cpi->section_is_low_motion = 1; - else - cpi->section_is_low_motion = 0; - - if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45) - cpi->section_is_fast_motion = 1; - else - cpi->section_is_fast_motion = 0; - - cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->section_intra_rating = + sectionstats.intra_error / + DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) @@ -1892,6 +2120,16 @@ // Is this a GF / ARF (Note that a KF is always also a GF) if (cpi->frames_till_gf_update_due == 0) { + // Update monitor of the bits per error observed so far. + // Done once per gf group based on what has gone before + // so do nothing if this is the first frame. + if (cpi->common.current_video_frame > 0) + { + cpi->observed_bpe = + (double)(cpi->clip_bits_total - cpi->bits_left) / + cpi->modified_error_used; + } + // Define next gf group and assign bits to it vpx_memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); define_gf_group(cpi, &this_frame_copy); @@ -1965,22 +2203,56 @@ if (cpi->common.current_video_frame == 0) { - // guess at 2nd pass q cpi->est_max_qcorrection_factor = 1.0; - tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width); - if (tmp_q < cpi->worst_quality) - { - cpi->active_worst_quality = tmp_q; - cpi->ni_av_qi = tmp_q; - } - else - { - cpi->active_worst_quality = cpi->worst_quality; - cpi->ni_av_qi = cpi->worst_quality; - } - } - else + // Experimental code to try and set a cq_level in constrained + // quality mode. + if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) + { + int est_cq; + + est_cq = + estimate_cq( cpi, + (cpi->total_coded_error_left / frames_left), + (int)(cpi->bits_left / frames_left), + cpi->common.Height, cpi->common.Width); + + cpi->cq_target_quality = cpi->oxcf.cq_level; + if ( est_cq > cpi->cq_target_quality ) + cpi->cq_target_quality = est_cq; + } + + // guess at maxq needed in 2nd pass + cpi->maxq_max_limit = cpi->worst_quality; + cpi->maxq_min_limit = cpi->best_quality; + tmp_q = estimate_max_q( cpi, + (cpi->total_coded_error_left / frames_left), + (int)(cpi->bits_left / frames_left), + cpi->common.Height, + cpi->common.Width); + + // Limit the maxq value returned subsequently. + // This increases the risk of overspend or underspend if the initial + // estimate for the clip is bad, but helps prevent excessive + // variation in Q, especially near the end of a clip + // where for example a small overspend may cause Q to crash + cpi->maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) + ? (tmp_q + 32) : cpi->worst_quality; + cpi->maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) + ? (tmp_q - 32) : cpi->best_quality; + + cpi->active_worst_quality = tmp_q; + cpi->ni_av_qi = tmp_q; + } + + // The last few frames of a clip almost always have to few or too many + // bits and for the sake of over exact rate control we dont want to make + // radical adjustments to the allowed quantizer range just to use up a + // few surplus bits or get beneath the target rate. + else if ( (cpi->common.current_video_frame < + (((unsigned int)cpi->total_stats->count * 255)>>8)) && + ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < + (unsigned int)cpi->total_stats->count) ) { if (frames_left < 1) frames_left = 1; @@ -1994,13 +2266,6 @@ cpi->active_worst_quality --; cpi->active_worst_quality = ((cpi->active_worst_quality * 3) + tmp_q + 2) / 4; - - // Clamp to user set limits - if (cpi->active_worst_quality > cpi->worst_quality) - cpi->active_worst_quality = cpi->worst_quality; - else if (cpi->active_worst_quality < cpi->best_quality) - cpi->active_worst_quality = cpi->best_quality; - } cpi->frames_to_key --; @@ -2122,6 +2387,9 @@ cpi->common.frame_type = KEY_FRAME; + // is this a forced key frame by interval + cpi->this_key_frame_forced = cpi->next_key_frame_forced; + // Clear the alt ref active flag as this can never be active on a key frame cpi->source_alt_ref_active = FALSE; @@ -2178,13 +2446,40 @@ if (cpi->oxcf.auto_key && cpi->frames_to_key > (int)cpi->key_frame_frequency ) { + FIRSTPASS_STATS *current_pos = cpi->stats_in; + FIRSTPASS_STATS tmp_frame; + cpi->frames_to_key /= 2; - // Estimate corrected kf group error - kf_group_err /= 2.0; - kf_group_intra_err /= 2.0; - kf_group_coded_err /= 2.0; + // Copy first frame details + vpx_memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); + + // Reset to the start of the group + reset_fpf_position(cpi, start_position); + + kf_group_err = 0; + kf_group_intra_err = 0; + kf_group_coded_err = 0; + + // Rescan to get the correct error data for the forced kf group + for( i = 0; i < cpi->frames_to_key; i++ ) + { + // Accumulate kf group errors + kf_group_err += calculate_modified_err(cpi, &tmp_frame); + kf_group_intra_err += tmp_frame.intra_error; + kf_group_coded_err += tmp_frame.coded_error; + + // Load a the next frame's stats + vp8_input_stats(cpi, &tmp_frame); + } + + // Reset to the start of the group + reset_fpf_position(cpi, current_pos); + + cpi->next_key_frame_forced = TRUE; } + else + cpi->next_key_frame_forced = FALSE; // Special case for the last frame of the file if (cpi->stats_in >= cpi->stats_in_end) @@ -2199,7 +2494,7 @@ } // Calculate the number of bits that should be assigned to the kf group. - if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0)) + if ((cpi->bits_left > 0) && (cpi->modified_error_left > 0.0)) { // Max for a single normal frame (not key frame) int max_bits = frame_max_bits(cpi); @@ -2211,7 +2506,7 @@ // complexity of the section cpi->kf_group_bits = (long long)( cpi->bits_left * ( kf_group_err / - cpi->modified_total_error_left )); + cpi->modified_error_left )); // Clip based on maximum per frame rate defined by the user. max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key; @@ -2278,12 +2573,17 @@ { double r; double motion_decay; - double motion_pct = next_frame.pcnt_motion; + double motion_pct; if (EOF == vp8_input_stats(cpi, &next_frame)) break; - r = (IIKFACTOR2 * next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)) ; + if (next_frame.intra_error > cpi->kf_intra_err_min) + r = (IIKFACTOR2 * next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + else + r = (IIKFACTOR2 * cpi->kf_intra_err_min / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); if (r > RMAX) r = RMAX; @@ -2293,6 +2593,7 @@ loop_decay_rate = next_frame.pcnt_inter; // High % motion -> somewhat higher decay rate + motion_pct = next_frame.pcnt_motion; motion_decay = (1.0 - (motion_pct / 20.0)); if (motion_decay < loop_decay_rate) loop_decay_rate = motion_decay; @@ -2344,17 +2645,7 @@ vp8_avg_stats(§ionstats); - if (sectionstats.pcnt_motion < .17) - cpi->section_is_low_motion = 1; - else - cpi->section_is_low_motion = 0; - - if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45) - cpi->section_is_fast_motion = 1; - else - cpi->section_is_fast_motion = 0; - - cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) @@ -2434,7 +2725,7 @@ kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100 // Adjustment to boost based on recent average q - kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100; + //kf_boost = kf_boost * vp8_kf_boost_qadjustment[cpi->ni_av_qi] / 100; if (kf_boost < 250) // Min KF boost kf_boost = 250; @@ -2474,7 +2765,7 @@ double alt_kf_grp_bits = ((double)cpi->bits_left * (kf_mod_err * (double)cpi->frames_to_key) / - DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)); + DOUBLE_DIVIDE_CHECK(cpi->modified_error_left)); alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); @@ -2492,7 +2783,7 @@ alt_kf_bits = (int)((double)cpi->bits_left * (kf_mod_err / - DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left))); + DOUBLE_DIVIDE_CHECK(cpi->modified_error_left))); if (alt_kf_bits > cpi->kf_bits) { @@ -2512,7 +2803,7 @@ // Adjust the count of total modified error left. // The count of bits left is adjusted elsewhere based on real coded frame sizes - cpi->modified_total_error_left -= kf_group_err; + cpi->modified_error_left -= kf_group_err; if (cpi->oxcf.allow_spatial_resampling) { diff -Nru libvpx-0.9.5/vp8/encoder/generic/csystemdependent.c libvpx-0.9.6/vp8/encoder/generic/csystemdependent.c --- libvpx-0.9.5/vp8/encoder/generic/csystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/generic/csystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -10,8 +10,8 @@ #include "vpx_ports/config.h" -#include "variance.h" -#include "onyx_int.h" +#include "vp8/encoder/variance.h" +#include "vp8/encoder/onyx_int.h" void vp8_arch_x86_encoder_init(VP8_COMP *cpi); @@ -40,6 +40,12 @@ cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c; + cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c; + cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c; + cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c; + cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c; + cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c; + cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c; @@ -85,9 +91,13 @@ cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; - +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sad; +#endif cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; +#if !(CONFIG_REALTIME_ONLY) + cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; +#endif #endif // Pure C: diff -Nru libvpx-0.9.5/vp8/encoder/mcomp.c libvpx-0.9.6/vp8/encoder/mcomp.c --- libvpx-0.9.5/vp8/encoder/mcomp.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/mcomp.c 2011-03-04 20:40:40.000000000 +0000 @@ -408,6 +408,7 @@ diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); break; case 3: + default: this_mv.col += 4; this_mv.row += 4; diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); @@ -778,15 +779,17 @@ int *num00, const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], - int *mvcost[2] + int *mvcost[2], + MV *center_mv ) { MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; + MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ; int i, j; unsigned char *src = (*(b->base_src) + b->src); int src_stride = b->src_stride; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; + int rr = center_mv->row, rc = center_mv->col; + int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc; unsigned int besterr, thiserr = 0x7fffffff; int k = -1, tk; @@ -891,7 +894,7 @@ best_mv->row = br; best_mv->col = bc; - return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; + return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ; } #undef MVC #undef PRE @@ -913,7 +916,8 @@ int *num00, vp8_variance_fn_ptr_t *fn_ptr, int *mvsadcost[2], - int *mvcost[2] + int *mvcost[2], + MV *center_mv ) { int i, j, step; @@ -940,6 +944,8 @@ unsigned char *check_here; int thissad; + *num00 = 0; + // Work out the start point for the search in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; @@ -949,7 +955,7 @@ (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); } // search_param determines the length of the initial step and hence the number of iterations @@ -961,8 +967,6 @@ best_mv->row = ref_row; best_mv->col = ref_col; - *num00 = 0; - for (step = 0; step < tot_steps ; step++) { for (j = 0 ; j < x->searches_per_step ; j++) @@ -982,7 +986,7 @@ { this_mv.row = this_row_offset << 3; this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1013,7 +1017,7 @@ return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); } int vp8_diamond_search_sadx4 @@ -1028,7 +1032,8 @@ int *num00, vp8_variance_fn_ptr_t *fn_ptr, int *mvsadcost[2], - int *mvcost[2] + int *mvcost[2], + MV *center_mv ) { int i, j, step; @@ -1055,6 +1060,8 @@ unsigned char *check_here; unsigned int thissad; + *num00 = 0; + // Work out the start point for the search in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; @@ -1064,7 +1071,7 @@ (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); } // search_param determines the length of the initial step and hence the number of iterations @@ -1076,8 +1083,6 @@ best_mv->row = ref_row; best_mv->col = ref_col; - *num00 = 0; - for (step = 0; step < tot_steps ; step++) { int all_in = 1, t; @@ -1108,7 +1113,7 @@ { this_mv.row = (best_mv->row + ss[i].mv.row) << 3; this_mv.col = (best_mv->col + ss[i].mv.col) << 3; - sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + sad_array[t] += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); if (sad_array[t] < bestsad) { @@ -1137,7 +1142,7 @@ { this_mv.row = this_row_offset << 3; this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1168,12 +1173,12 @@ return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); } #if !(CONFIG_REALTIME_ONLY) -int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) +int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; @@ -1211,7 +1216,7 @@ // Baseline value at the centre //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); } // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border @@ -1239,7 +1244,7 @@ this_mv.col = c << 3; //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14)); //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)]; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); if (thissad < bestsad) { @@ -1258,12 +1263,12 @@ if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); else return INT_MAX; } -int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) +int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; @@ -1301,7 +1306,7 @@ (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); } // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border @@ -1323,7 +1328,7 @@ check_here = r * mv_stride + in_what + col_min; c = col_min; - while ((c + 3) < col_max) + while ((c + 2) < col_max) { int i; @@ -1336,7 +1341,7 @@ if (thissad < bestsad) { this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1359,7 +1364,7 @@ if (thissad < bestsad) { this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1381,12 +1386,163 @@ if (bestsad < INT_MAX) return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); else return INT_MAX; } -#endif +int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) +{ + unsigned char *what = (*(b->base_src) + b->src); + int what_stride = b->src_stride; + unsigned char *in_what; + int in_what_stride = d->pre_stride; + int mv_stride = d->pre_stride; + unsigned char *bestaddress; + MV *best_mv = &d->bmi.mv.as_mv; + MV this_mv; + int bestsad = INT_MAX; + int r, c; + + unsigned char *check_here; + unsigned int thissad; + + int ref_row = ref_mv->row >> 3; + int ref_col = ref_mv->col >> 3; + + int row_min = ref_row - distance; + int row_max = ref_row + distance; + int col_min = ref_col - distance; + int col_max = ref_col + distance; + + unsigned short sad_array8[8]; + unsigned int sad_array[3]; + + // Work out the mid point for the search + in_what = *(d->base_pre) + d->pre; + bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; + + best_mv->row = ref_row; + best_mv->col = ref_col; + + // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits + if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && + (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) + { + // Baseline value at the centre + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + } + + // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border + if (col_min < x->mv_col_min) + col_min = x->mv_col_min; + + if (col_max > x->mv_col_max) + col_max = x->mv_col_max; + + if (row_min < x->mv_row_min) + row_min = x->mv_row_min; + + if (row_max > x->mv_row_max) + row_max = x->mv_row_max; + + for (r = row_min; r < row_max ; r++) + { + this_mv.row = r << 3; + check_here = r * mv_stride + in_what + col_min; + c = col_min; + + while ((c + 7) < col_max) + { + int i; + + fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8); + + for (i = 0; i < 8; i++) + { + thissad = (unsigned int)sad_array8[i]; + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here++; + c++; + } + } + + while ((c + 2) < col_max) + { + int i; + + fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); + + for (i = 0; i < 3; i++) + { + thissad = sad_array[i]; + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here++; + c++; + } + } + + while (c < col_max) + { + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here ++; + c ++; + } + } + + this_mv.row = best_mv->row << 3; + this_mv.col = best_mv->col << 3; + + if (bestsad < INT_MAX) + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + + vp8_mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); + else + return INT_MAX; +} +#endif /* !(CONFIG_REALTIME_ONLY) */ #ifdef ENTROPY_STATS void print_mode_context(void) diff -Nru libvpx-0.9.5/vp8/encoder/mcomp.h libvpx-0.9.6/vp8/encoder/mcomp.h --- libvpx-0.9.5/vp8/encoder/mcomp.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/mcomp.h 2011-03-04 20:40:40.000000000 +0000 @@ -25,7 +25,6 @@ #define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units - extern void print_mode_context(void); extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight); extern void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride); @@ -44,8 +43,8 @@ int *num00, const vp8_variance_fn_ptr_t *vf, int *mvsadcost[2], - int *mvcost[2] - + int *mvcost[2], + MV *center_mv ); typedef int (fractional_mv_step_fp) @@ -67,7 +66,8 @@ int distance, \ vp8_variance_fn_ptr_t *fn_ptr, \ int *mvcost[2], \ - int *mvsadcost[2] \ + int *mvsadcost[2], \ + MV *center_mv \ ) #define prototype_diamond_search_sad(sym)\ @@ -83,7 +83,8 @@ int *num00, \ vp8_variance_fn_ptr_t *fn_ptr, \ int *mvsadcost[2], \ - int *mvcost[2] \ + int *mvcost[2], \ + MV *center_mv \ ) #if ARCH_X86 || ARCH_X86_64 @@ -93,6 +94,7 @@ typedef prototype_full_search_sad(*vp8_full_search_fn_t); extern prototype_full_search_sad(vp8_full_search_sad); extern prototype_full_search_sad(vp8_full_search_sadx3); +extern prototype_full_search_sad(vp8_full_search_sadx8); typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t); extern prototype_diamond_search_sad(vp8_diamond_search_sad); diff -Nru libvpx-0.9.5/vp8/encoder/modecosts.c libvpx-0.9.6/vp8/encoder/modecosts.c --- libvpx-0.9.5/vp8/encoder/modecosts.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/modecosts.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,10 +9,10 @@ */ -#include "blockd.h" +#include "vp8/common/blockd.h" #include "onyx_int.h" #include "treewriter.h" -#include "entropymode.h" +#include "vp8/common/entropymode.h" void vp8_init_mode_costs(VP8_COMP *c) diff -Nru libvpx-0.9.5/vp8/encoder/onyx_if.c libvpx-0.9.6/vp8/encoder/onyx_if.c --- libvpx-0.9.5/vp8/encoder/onyx_if.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/onyx_if.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,27 +9,26 @@ */ -#include "onyxc_int.h" +#include "vp8/common/onyxc_int.h" #include "onyx_int.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" #include "quantize.h" -#include "alloccommon.h" +#include "vp8/common/alloccommon.h" #include "mcomp.h" #include "firstpass.h" #include "psnr.h" #include "vpx_scale/vpxscale.h" -#include "extend.h" +#include "vp8/common/extend.h" #include "ratectrl.h" -#include "quant_common.h" +#include "vp8/common/quant_common.h" #include "segmentation.h" -#include "g_common.h" +#include "vp8/common/g_common.h" #include "vpx_scale/yv12extend.h" -#include "postproc.h" +#include "vp8/common/postproc.h" #include "vpx_mem/vpx_mem.h" -#include "swapyv12buffer.h" -#include "threading.h" +#include "vp8/common/swapyv12buffer.h" +#include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" -#include "vpxerrors.h" #include "temporal_filter.h" #if ARCH_ARM #include "vpx_ports/arm.h" @@ -73,6 +72,7 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi); static void set_default_lf_deltas(VP8_COMP *cpi); @@ -154,37 +154,26 @@ // Tables relating active max Q to active min Q static const int kf_low_motion_minq[QINDEX_RANGE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, - 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10, - 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, - 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, - 27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, + 3,3,3,3,3,3,4,4,4,5,5,5,5,5,6,6, + 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11, + 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16, + 16,16,17,17,18,18,18,18,19,20,20,21,21,22,23,23 }; static const int kf_high_motion_minq[QINDEX_RANGE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10, - 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, - 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, - 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34, - 35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3, + 3,3,3,3,4,4,4,4,5,5,5,5,5,5,6,6, + 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11, + 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16, + 16,16,17,17,18,18,18,18,19,19,20,20,20,20,21,21, + 21,21,22,22,23,23,24,25,25,26,26,27,28,28,29,30 }; -/*static const int kf_minq[QINDEX_RANGE] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14, - 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22, - 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30, - 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38 -};*/ static const int gf_low_motion_minq[QINDEX_RANGE] = { 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, @@ -205,7 +194,7 @@ 22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29, 30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37, 38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48, - 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, + 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64 }; static const int gf_high_motion_minq[QINDEX_RANGE] = { @@ -216,29 +205,18 @@ 25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32, 33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40, 41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54, - 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80, + 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80 }; -/*static const int gf_arf_minq[QINDEX_RANGE] = -{ - 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4, - 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, - 9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14, - 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22, - 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30, - 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39, - 39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50, - 51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66 -};*/ static const int inter_minq[QINDEX_RANGE] = { - 0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7, - 8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16, - 16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23, - 23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34, - 35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47, - 47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58, - 59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69, - 69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81, + 0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9, + 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20, + 20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31, + 32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43, + 44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56, + 57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70, + 71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85, + 86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 }; void vp8_initialize() @@ -283,23 +261,31 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) { + vpx_free(cpi->tplist); + cpi->tplist = NULL; - // Delete sementation map - if (cpi->segmentation_map != 0) - vpx_free(cpi->segmentation_map); + // Delete last frame MV storage buffers + vpx_free(cpi->lfmv); + cpi->lfmv = 0; - cpi->segmentation_map = 0; + vpx_free(cpi->lf_ref_frame_sign_bias); + cpi->lf_ref_frame_sign_bias = 0; + + vpx_free(cpi->lf_ref_frame); + cpi->lf_ref_frame = 0; - if (cpi->active_map != 0) - vpx_free(cpi->active_map); + // Delete sementation map + vpx_free(cpi->segmentation_map); + cpi->segmentation_map = 0; + vpx_free(cpi->active_map); cpi->active_map = 0; +#if !(CONFIG_REALTIME_ONLY) // Delete first pass motion map - if (cpi->fp_motion_map != 0) - vpx_free(cpi->fp_motion_map); - + vpx_free(cpi->fp_motion_map); cpi->fp_motion_map = 0; +#endif vp8_de_alloc_frame_buffers(&cpi->common); @@ -320,19 +306,20 @@ vpx_free(cpi->tok); cpi->tok = 0; - // Structure used to minitor GF useage - if (cpi->gf_active_flags != 0) - vpx_free(cpi->gf_active_flags); - + // Structure used to monitor GF usage + vpx_free(cpi->gf_active_flags); cpi->gf_active_flags = 0; - if(cpi->mb.pip) - vpx_free(cpi->mb.pip); - + vpx_free(cpi->mb.pip); cpi->mb.pip = 0; +#if !(CONFIG_REALTIME_ONLY) vpx_free(cpi->total_stats); + cpi->total_stats = 0; + vpx_free(cpi->this_frame_stats); + cpi->this_frame_stats = 0; +#endif } static void enable_segmentation(VP8_PTR ptr) @@ -435,7 +422,6 @@ set_segment_data(ptr, &feature_data[0][0], SEGMENT_DELTADATA); // Delete sementation map - if (seg_map != 0) vpx_free(seg_map); seg_map = 0; @@ -529,7 +515,6 @@ set_segment_data((VP8_PTR)cpi, &feature_data[0][0], SEGMENT_DELTADATA); // Delete sementation map - if (seg_map != 0) vpx_free(seg_map); seg_map = 0; @@ -563,6 +548,7 @@ int Speed = cpi->Speed; int i; VP8_COMMON *cm = &cpi->common; + int last_improved_quant = sf->improved_quant; // Initialise default mode frequency sampling variables for (i = 0; i < MAX_MODES; i ++) @@ -574,7 +560,7 @@ cpi->mbs_tested_so_far = 0; - // best quality + // best quality defaults sf->RD = 1; sf->search_method = NSTEP; sf->improved_quant = 1; @@ -589,9 +575,12 @@ sf->max_fs_radius = 32; sf->iterative_sub_pixel = 1; sf->optimize_coefficients = 1; + sf->use_fastquant_for_pick = 0; + sf->no_skip_block4x4_search = 1; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->improved_mv_pred = 1; cpi->do_full[0] = 0; cpi->do_full[1] = 0; @@ -634,34 +623,6 @@ sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - - if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) - { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; - sf->thresh_mult[THR_SPLITMV ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) - { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; - sf->thresh_mult[THR_SPLITG ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) - { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; - sf->thresh_mult[THR_SPLITA ] = INT_MAX; - } - break; case 1: case 3: @@ -682,6 +643,32 @@ sf->thresh_mult[THR_NEARG ] = 1000; sf->thresh_mult[THR_NEARA ] = 1000; +#if 1 + sf->thresh_mult[THR_ZEROMV ] = 0; + sf->thresh_mult[THR_ZEROG ] = 0; + sf->thresh_mult[THR_ZEROA ] = 0; + sf->thresh_mult[THR_NEARESTMV] = 0; + sf->thresh_mult[THR_NEARESTG ] = 0; + sf->thresh_mult[THR_NEARESTA ] = 0; + sf->thresh_mult[THR_NEARMV ] = 0; + sf->thresh_mult[THR_NEARG ] = 0; + sf->thresh_mult[THR_NEARA ] = 0; + +// sf->thresh_mult[THR_DC ] = 0; + +// sf->thresh_mult[THR_V_PRED ] = 1000; +// sf->thresh_mult[THR_H_PRED ] = 1000; +// sf->thresh_mult[THR_B_PRED ] = 2000; +// sf->thresh_mult[THR_TM ] = 1000; + + sf->thresh_mult[THR_NEWMV ] = 1000; + sf->thresh_mult[THR_NEWG ] = 1000; + sf->thresh_mult[THR_NEWA ] = 1000; + + sf->thresh_mult[THR_SPLITMV ] = 1700; + sf->thresh_mult[THR_SPLITG ] = 4500; + sf->thresh_mult[THR_SPLITA ] = 4500; +#else sf->thresh_mult[THR_NEWMV ] = 1500; sf->thresh_mult[THR_NEWG ] = 1500; sf->thresh_mult[THR_NEWA ] = 1500; @@ -689,45 +676,26 @@ sf->thresh_mult[THR_SPLITMV ] = 5000; sf->thresh_mult[THR_SPLITG ] = 10000; sf->thresh_mult[THR_SPLITA ] = 10000; - +#endif sf->full_freq[0] = 15; sf->full_freq[1] = 31; - sf->first_step = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - - if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) + if (Speed > 0) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; - sf->thresh_mult[THR_SPLITMV ] = INT_MAX; - } + /* Disable coefficient optimization above speed 0 */ + sf->optimize_coefficients = 0; + sf->use_fastquant_for_pick = 1; + sf->no_skip_block4x4_search = 0; - if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) - { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; - sf->thresh_mult[THR_SPLITG ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) - { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; - sf->thresh_mult[THR_SPLITA ] = INT_MAX; + sf->first_step = 1; + + cpi->mode_check_freq[THR_SPLITG] = 2; + cpi->mode_check_freq[THR_SPLITA] = 2; + cpi->mode_check_freq[THR_SPLITMV] = 0; } - if (Speed > 0) + if (Speed > 1) { - // Disable coefficient optimization above speed 0 - sf->optimize_coefficients = 0; - cpi->mode_check_freq[THR_SPLITG] = 4; cpi->mode_check_freq[THR_SPLITA] = 4; cpi->mode_check_freq[THR_SPLITMV] = 2; @@ -760,15 +728,9 @@ sf->thresh_mult[THR_NEWA ] = 2000; sf->thresh_mult[THR_SPLITA ] = 20000; } - - sf->improved_quant = 0; - sf->improved_dct = 0; - - sf->first_step = 1; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; } - if (Speed > 1) + if (Speed > 2) { cpi->mode_check_freq[THR_SPLITG] = 15; cpi->mode_check_freq[THR_SPLITA] = 15; @@ -803,43 +765,23 @@ sf->thresh_mult[THR_SPLITA ] = 50000; } - // Only do recode loop on key frames and golden frames + sf->improved_quant = 0; + sf->improved_dct = 0; + + // Only do recode loop on key frames, golden frames and + // alt ref frames sf->recode_loop = 2; sf->full_freq[0] = 31; sf->full_freq[1] = 63; - } - if (Speed > 2) + if (Speed > 3) { - sf->auto_filter = 0; // Faster selection of loop filter - cpi->mode_check_freq[THR_V_PRED] = 2; - cpi->mode_check_freq[THR_H_PRED] = 2; - cpi->mode_check_freq[THR_B_PRED] = 2; - - if (cpi->ref_frame_flags & VP8_GOLD_FLAG) - { - cpi->mode_check_freq[THR_NEARG] = 2; - cpi->mode_check_freq[THR_NEWG] = 4; - } - - if (cpi->ref_frame_flags & VP8_ALT_FLAG) - { - cpi->mode_check_freq[THR_NEARA] = 2; - cpi->mode_check_freq[THR_NEWA] = 4; - } - sf->thresh_mult[THR_SPLITA ] = INT_MAX; sf->thresh_mult[THR_SPLITG ] = INT_MAX; sf->thresh_mult[THR_SPLITMV ] = INT_MAX; - sf->full_freq[0] = 63; - sf->full_freq[1] = 127; - } - - if (Speed > 3) - { cpi->mode_check_freq[THR_V_PRED] = 0; cpi->mode_check_freq[THR_H_PRED] = 0; cpi->mode_check_freq[THR_B_PRED] = 0; @@ -851,13 +793,16 @@ sf->auto_filter = 1; sf->recode_loop = 0; // recode loop off sf->RD = 0; // Turn rd off - sf->full_freq[0] = INT_MAX; - sf->full_freq[1] = INT_MAX; + + sf->full_freq[0] = 63; + sf->full_freq[1] = 127; } if (Speed > 4) { sf->auto_filter = 0; // Faster selection of loop filter + sf->full_freq[0] = INT_MAX; + sf->full_freq[1] = INT_MAX; cpi->mode_check_freq[THR_V_PRED] = 2; cpi->mode_check_freq[THR_H_PRED] = 2; @@ -923,33 +868,6 @@ sf->full_freq[1] = 31; sf->search_method = NSTEP; - if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) - { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; - sf->thresh_mult[THR_SPLITMV ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) - { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; - sf->thresh_mult[THR_SPLITG ] = INT_MAX; - } - - if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) - { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; - sf->thresh_mult[THR_SPLITA ] = INT_MAX; - } - if (Speed > 0) { cpi->mode_check_freq[THR_SPLITG] = 4; @@ -1078,6 +996,7 @@ #else sf->search_method = DIAMOND; #endif + sf->iterative_sub_pixel = 0; cpi->mode_check_freq[THR_V_PRED] = 4; cpi->mode_check_freq[THR_H_PRED] = 4; @@ -1129,7 +1048,6 @@ int total_skip; int min = 2000; - sf->iterative_sub_pixel = 0; if (cpi->oxcf.encode_breakout > 2000) min = cpi->oxcf.encode_breakout; @@ -1185,6 +1103,7 @@ sf->thresh_mult[THR_V_PRED] = INT_MAX; sf->thresh_mult[THR_H_PRED] = INT_MAX; + sf->improved_mv_pred = 0; } if (Speed > 8) @@ -1230,7 +1149,45 @@ vpx_memset(cpi->error_bins, 0, sizeof(cpi->error_bins)); - }; + }; /* switch */ + + /* disable frame modes if flags not set */ + if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) + { + sf->thresh_mult[THR_NEWMV ] = INT_MAX; + sf->thresh_mult[THR_NEARESTMV] = INT_MAX; + sf->thresh_mult[THR_ZEROMV ] = INT_MAX; + sf->thresh_mult[THR_NEARMV ] = INT_MAX; + sf->thresh_mult[THR_SPLITMV ] = INT_MAX; + } + + if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) + { + sf->thresh_mult[THR_NEARESTG ] = INT_MAX; + sf->thresh_mult[THR_ZEROG ] = INT_MAX; + sf->thresh_mult[THR_NEARG ] = INT_MAX; + sf->thresh_mult[THR_NEWG ] = INT_MAX; + sf->thresh_mult[THR_SPLITG ] = INT_MAX; + } + + if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) + { + sf->thresh_mult[THR_NEARESTA ] = INT_MAX; + sf->thresh_mult[THR_ZEROA ] = INT_MAX; + sf->thresh_mult[THR_NEARA ] = INT_MAX; + sf->thresh_mult[THR_NEWA ] = INT_MAX; + sf->thresh_mult[THR_SPLITA ] = INT_MAX; + } + + + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if ( cpi->pass == 1 ) + { + sf->improved_quant = 0; + sf->optimize_coefficients = 0; + sf->improved_dct = 0; + } if (cpi->sf.search_method == NSTEP) { @@ -1262,6 +1219,8 @@ { cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb); } + if (cpi->sf.improved_quant != last_improved_quant) + vp8cx_init_quantizer(cpi); #if CONFIG_RUNTIME_CPU_DETECT cpi->mb.e_mbd.rtcd = &cpi->common.rtcd; @@ -1299,6 +1258,8 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi) { int i, buffers; + /* allocate source_buffer to be multiples of 16 */ + int width = (cpi->oxcf.Width + 15) & ~15; buffers = cpi->oxcf.lag_in_frames; @@ -1310,7 +1271,7 @@ for (i = 0; i < buffers; i++) if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer, - cpi->oxcf.Width, cpi->oxcf.Height, + width, cpi->oxcf.Height, 16)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffer"); @@ -1318,7 +1279,7 @@ #if VP8_TEMPORAL_ALT_REF if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer, - cpi->oxcf.Width, cpi->oxcf.Height, 16)) + width, cpi->oxcf.Height, 16)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); @@ -1329,11 +1290,13 @@ static int vp8_alloc_partition_data(VP8_COMP *cpi) { + vpx_free(cpi->mb.pip); + cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) * (cpi->common.mb_rows + 1), sizeof(PARTITION_INFO)); if(!cpi->mb.pip) - return ALLOC_FAILURE; + return 1; cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1; @@ -1373,7 +1336,6 @@ "Failed to allocate scaled source buffer"); - if (cpi->tok != 0) vpx_free(cpi->tok); { @@ -1389,18 +1351,40 @@ // Structures used to minitor GF usage - if (cpi->gf_active_flags != 0) vpx_free(cpi->gf_active_flags); CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; +#if !(CONFIG_REALTIME_ONLY) + vpx_free(cpi->total_stats); + cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs)); + + vpx_free(cpi->this_frame_stats); + cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs)); + if(!cpi->total_stats || !cpi->this_frame_stats) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate firstpass stats"); +#endif + +#if CONFIG_MULTITHREAD + if (width < 640) + cpi->mt_sync_range = 1; + else if (width <= 1280) + cpi->mt_sync_range = 4; + else if (width <= 2560) + cpi->mt_sync_range = 8; + else + cpi->mt_sync_range = 16; +#endif + + vpx_free(cpi->tplist); + + CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); } @@ -1437,21 +1421,28 @@ cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2; - //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1; - //cpi->max_gf_interval = 24; + // Set Maximum gf/arf interval + cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); - if (cpi->max_gf_interval < 12) + if(cpi->max_gf_interval < 12) cpi->max_gf_interval = 12; + // Extended interval for genuinely static scenes + cpi->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - // Special conditions when altr ref frame enabled in lagged compress mode + // Special conditions when altr ref frame enabled in lagged compress mode if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1; + + if (cpi->static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1) + cpi->static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1; } + + if ( cpi->max_gf_interval > cpi->static_scene_max_gf_interval ) + cpi->max_gf_interval = cpi->static_scene_max_gf_interval; } @@ -1491,6 +1482,7 @@ cpi->auto_worst_q = 0; cpi->oxcf.best_allowed_q = MINQ; cpi->oxcf.worst_allowed_q = MAXQ; + cpi->oxcf.cq_level = MINQ; cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER; cpi->oxcf.starting_buffer_level = 4000; @@ -1591,6 +1583,7 @@ cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; + cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; if (oxcf->fixed_q >= 0) { @@ -1680,6 +1673,8 @@ cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; cpi->best_quality = cpi->oxcf.best_allowed_q; cpi->active_best_quality = cpi->oxcf.best_allowed_q; + cpi->cq_target_quality = cpi->oxcf.cq_level; + cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; @@ -1876,6 +1871,7 @@ cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; + cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; if (oxcf->fixed_q >= 0) { @@ -1968,6 +1964,8 @@ cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; + cpi->cq_target_quality = cpi->oxcf.cq_level; + cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; @@ -2109,7 +2107,6 @@ cpi->common.error.setjmp = 1; - CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA))); CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1)); vp8_create_common(&cpi->common); @@ -2145,7 +2142,10 @@ cpi->alt_is_last = 0 ; cpi->gold_is_alt = 0 ; - + // allocate memory for storing last frame's MVs for MV prediction. + CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int_mv))); + CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); + CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cpi->common.mb_rows+2) * (cpi->common.mb_cols+2), sizeof(int))); // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(cpi->segmentation_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1)); @@ -2153,9 +2153,11 @@ vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols)); cpi->active_map_enabled = 0; +#if !(CONFIG_REALTIME_ONLY) // Create the first pass motion map structure and set to 0 // Allocate space for maximum of 15 buffers CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1)); +#endif #if 0 // Experimental code for lagged and one pass @@ -2201,9 +2203,13 @@ init_context_counters(); #endif + /*Initialize the feed-forward activity masking.*/ + cpi->activity_avg = 90<<12; cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; + cpi->this_key_frame_forced = FALSE; + cpi->next_key_frame_forced = FALSE; cpi->source_alt_ref_pending = FALSE; cpi->source_alt_ref_active = FALSE; @@ -2332,7 +2338,9 @@ init_mv_ref_counts(); #endif +#if CONFIG_MULTITHREAD vp8cx_create_encoder_threads(cpi); +#endif cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); @@ -2341,6 +2349,7 @@ cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); + cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8); cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); @@ -2350,6 +2359,7 @@ cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); + cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8); cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); @@ -2359,6 +2369,7 @@ cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); + cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8); cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); @@ -2368,6 +2379,7 @@ cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); + cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8); cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); @@ -2377,6 +2389,7 @@ cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); + cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); #if !(CONFIG_REALTIME_ONLY) @@ -2602,12 +2615,13 @@ } +#if CONFIG_MULTITHREAD vp8cx_remove_encoder_threads(cpi); +#endif vp8_dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); vpx_free(cpi->tok); - vpx_free(cpi->rdtok); vpx_free(cpi->cyclic_refresh_map); vp8_remove_common(&cpi->common); @@ -3022,29 +3036,46 @@ } } - // Note target_size in bits * 256 per MB - cpi->target_bits_per_mb = (cpi->this_frame_target * 256) / cpi->common.MBs; - return 1; } + static void set_quantizer(VP8_COMP *cpi, int Q) { VP8_COMMON *cm = &cpi->common; MACROBLOCKD *mbd = &cpi->mb.e_mbd; - + int update = 0; + int new_delta_q; cm->base_qindex = Q; + /* if any of the delta_q values are changing update flag has to be set */ + /* currently only y2dc_delta_q may change */ + cm->y1dc_delta_q = 0; - cm->y2dc_delta_q = 0; cm->y2ac_delta_q = 0; cm->uvdc_delta_q = 0; cm->uvac_delta_q = 0; + if (Q < 4) + { + new_delta_q = 4-Q; + } + else + new_delta_q = 0; + + update |= cm->y2dc_delta_q != new_delta_q; + cm->y2dc_delta_q = new_delta_q; + + // Set Segment specific quatizers mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3]; + + /* quantizer has to be reinitialized for any delta_q changes */ + if(update) + vp8cx_init_quantizer(cpi); + } static void update_alt_ref_frame_and_stats(VP8_COMP *cpi) @@ -3074,8 +3105,8 @@ // Update data structure that monitors level of reference to last GF vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user + // this frame refreshes means next frames don't unless specified by user cpi->common.frames_since_golden = 0; // Clear the alternate reference update pending flag. @@ -3093,8 +3124,11 @@ // Update the Golden frame reconstruction buffer if signalled and the GF usage counts. if (cm->refresh_golden_frame) { - // Update the golden frame buffer - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + if (cm->frame_type != KEY_FRAME) + { + // Update the golden frame buffer + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + } // Select an interval before next GF if (!cpi->auto_gold) @@ -3427,6 +3461,54 @@ #endif // return of 0 means drop frame +// Function to test for conditions that indeicate we should loop +// back and recode a frame. +static BOOL recode_loop_test( VP8_COMP *cpi, + int high_limit, int low_limit, + int q, int maxq, int minq ) +{ + BOOL force_recode = FALSE; + VP8_COMMON *cm = &cpi->common; + + // Is frame recode allowed at all + // Yes if either recode mode 1 is selected or mode two is selcted + // and the frame is a key frame. golden frame or alt_ref_frame + if ( (cpi->sf.recode_loop == 1) || + ( (cpi->sf.recode_loop == 2) && + ( (cm->frame_type == KEY_FRAME) || + cm->refresh_golden_frame || + cm->refresh_alt_ref_frame ) ) ) + { + // General over and under shoot tests + if ( ((cpi->projected_frame_size > high_limit) && (q < maxq)) || + ((cpi->projected_frame_size < low_limit) && (q > minq)) ) + { + force_recode = TRUE; + } + // Special Constrained quality tests + else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + // Undershoot and below auto cq level + if ( (q > cpi->cq_target_quality) && + (cpi->projected_frame_size < + ((cpi->this_frame_target * 7) >> 3))) + { + force_recode = TRUE; + } + // Severe undershoot and between auto and user cq level + else if ( (q > cpi->oxcf.cq_level) && + (cpi->projected_frame_size < cpi->min_frame_bandwidth) && + (cpi->active_best_quality > cpi->oxcf.cq_level)) + { + force_recode = TRUE; + cpi->active_best_quality = cpi->oxcf.cq_level; + } + } + } + + return force_recode; +} + static void encode_frame_to_data_rate ( VP8_COMP *cpi, @@ -3466,6 +3548,17 @@ // Test code for segmentation of gf/arf (0,0) //segmentation_test_function((VP8_PTR) cpi); +#if CONFIG_REALTIME_ONLY + if(cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) + { + if(cpi->force_next_frame_intra) + { + cm->frame_type = KEY_FRAME; /* delayed intra frame */ + } + } + cpi->force_next_frame_intra = 0; +#endif + // For an alt ref frame in 2 pass we skip the call to the second pass function that sets the target bandwidth #if !(CONFIG_REALTIME_ONLY) @@ -3489,8 +3582,18 @@ cpi->zbin_over_quant = 0; cpi->zbin_mode_boost = 0; - // Enable mode based tweaking of the zbin + // Enable or disable mode based tweaking of the zbin + // For 2 Pass Only used where GF/ARF prediction quality + // is above a threshold + cpi->zbin_mode_boost = 0; cpi->zbin_mode_boost_enabled = TRUE; + if (cpi->pass == 2) + { + if ( cpi->gfu_boost <= 400 ) + { + cpi->zbin_mode_boost_enabled = FALSE; + } + } // Current default encoder behaviour for the altref sign bias if (cpi->source_alt_ref_active) @@ -3678,51 +3781,85 @@ } // Set an active best quality and if necessary active worst quality - if (cpi->pass == 2 || (cm->current_video_frame > 150)) + // There is some odd behaviour for one pass here that needs attention. + if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) { - int Q; - int i; - int bpm_target; - //int tmp; - vp8_clear_system_state(); Q = cpi->active_worst_quality; - if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + if ( cm->frame_type == KEY_FRAME ) { - if (cm->frame_type != KEY_FRAME) + if ( cpi->pass == 2 ) + { + if (cpi->gfu_boost > 600) + cpi->active_best_quality = kf_low_motion_minq[Q]; + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + + // Special case for key frames forced because we have reached + // the maximum key frame interval. Here force the Q to a range + // based on the ambient Q to reduce the risk of popping + if ( cpi->this_key_frame_forced ) + { + if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8) + cpi->active_best_quality = cpi->avg_frame_qindex * 7/8; + else if ( cpi->active_best_quality < cpi->avg_frame_qindex >> 2 ) + cpi->active_best_quality = cpi->avg_frame_qindex >> 2; + } + } + // One pass more conservative + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + } + + else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + { + // Use the lower of cpi->active_worst_quality and recent + // average Q as basis for GF/ARF Q limit unless last frame was + // a key frame. + if ( (cpi->frames_since_key > 1) && + (cpi->avg_frame_qindex < cpi->active_worst_quality) ) { - if (cpi->avg_frame_qindex < cpi->active_worst_quality) - Q = cpi->avg_frame_qindex; + Q = cpi->avg_frame_qindex; - if ( cpi->gfu_boost > 1000 ) + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->oxcf.cq_level) ) + { + Q = cpi->oxcf.cq_level; + } + } + + if ( cpi->pass == 2 ) + { + if ( cpi->gfu_boost > 1000 ) cpi->active_best_quality = gf_low_motion_minq[Q]; else if ( cpi->gfu_boost < 400 ) cpi->active_best_quality = gf_high_motion_minq[Q]; else cpi->active_best_quality = gf_mid_motion_minq[Q]; - - /*cpi->active_best_quality = gf_arf_minq[Q]; - tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400; - //tmp = (cpi->gfu_boost > 1000) ? 600 : - //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400; - tmp = 128 - (tmp >> 4); - cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/ - - } - // KEY FRAMES - else - { - if (cpi->gfu_boost > 600) - cpi->active_best_quality = kf_low_motion_minq[Q]; - else - cpi->active_best_quality = kf_high_motion_minq[Q]; - } + } + // One pass more conservative + else + cpi->active_best_quality = gf_high_motion_minq[Q]; } else { cpi->active_best_quality = inter_minq[Q]; + + // For the constant/constrained quality mode we dont want + // the quality to rise above the cq level. + if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (cpi->active_best_quality < cpi->cq_target_quality) ) + { + // If we are strongly undershooting the target rate in the last + // frames then use the user passed in cq value not the auto + // cq value. + if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth ) + cpi->active_best_quality = cpi->oxcf.cq_level; + else + cpi->active_best_quality = cpi->cq_target_quality; + } } // If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames @@ -3740,7 +3877,6 @@ cpi->active_best_quality -= min_qadjustment; } - } } @@ -3771,17 +3907,16 @@ vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8). + // Limit Q range for the adaptive loop. bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; + q_low = cpi->active_best_quality; + q_high = cpi->active_worst_quality; vp8_save_coding_context(cpi); loop_count = 0; - q_low = cpi->best_quality; - q_high = cpi->worst_quality; - scale_and_extend_source(cpi->un_scaled_source, cpi); #if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC @@ -3817,7 +3952,6 @@ if (cm->frame_type == KEY_FRAME) { vp8_de_noise(cpi->Source, cpi->Source, l , 1, 0, RTCD(postproc)); - cpi->ppi.frame = 0; } else { @@ -3829,10 +3963,6 @@ { src += cpi->Source->y_stride * (cpi->Source->y_height - 1); } - - //temp_filter(&cpi->ppi,src,src, - // cm->last_frame.y_width * cm->last_frame.y_height, - // cpi->oxcf.noise_sensitivity); } } @@ -3939,6 +4069,14 @@ // (assuming that we didn't)! if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME) { + +#if CONFIG_REALTIME_ONLY + { + /* we don't do re-encoding in realtime mode + * if key frame is decided than we force it on next frame */ + cpi->force_next_frame_intra = decide_key_frame(cpi); + } +#else if (decide_key_frame(cpi)) { vp8_calc_auto_iframe_target_size(cpi); @@ -3963,15 +4101,13 @@ Q = vp8_regulate_q(cpi, cpi->this_frame_target); - q_low = cpi->best_quality; - q_high = cpi->worst_quality; - vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Limit Q range for the adaptive loop (Values not clipped to range 20-60 as in VP8). + // Limit Q range for the adaptive loop. bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; - + q_low = cpi->active_best_quality; + q_high = cpi->active_worst_quality; loop_count++; Loop = TRUE; @@ -3979,6 +4115,7 @@ resize_key_frame(cpi); continue; } +#endif } vp8_clear_system_state(); @@ -4009,26 +4146,60 @@ active_worst_qchanged = FALSE; #if !(CONFIG_REALTIME_ONLY) + // Special case handling for forced key frames + if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) + { + int last_q = Q; + int kf_err = vp8_calc_ss_err(cpi->Source, + &cm->yv12_fb[cm->new_fb_idx], + IF_RTCD(&cpi->rtcd.variance)); + + // The key frame is not good enough + if ( kf_err > ((cpi->ambient_err * 7) >> 3) ) + { + // Lower q_high + q_high = (Q > q_low) ? (Q - 1) : q_low; + + // Adjust Q + Q = (q_high + q_low) >> 1; + } + // The key frame is much better than the previous frame + else if ( kf_err < (cpi->ambient_err >> 1) ) + { + // Raise q_low + q_low = (Q < q_high) ? (Q + 1) : q_high; + + // Adjust Q + Q = (q_high + q_low + 1) >> 1; + } + + // Clamp Q to upper and lower limits: + if (Q > q_high) + Q = q_high; + else if (Q < q_low) + Q = q_low; + + Loop = ((Q != last_q)) ? TRUE : FALSE; + } // Is the projected frame size out of range and are we allowed to attempt to recode. - if (((cpi->sf.recode_loop == 1) || - ((cpi->sf.recode_loop == 2) && (cm->refresh_golden_frame || (cm->frame_type == KEY_FRAME)))) && - (((cpi->projected_frame_size > frame_over_shoot_limit) && (Q < top_index)) || - //((cpi->projected_frame_size > frame_over_shoot_limit ) && (Q == top_index) && (cpi->zbin_over_quant < ZBIN_OQ_MAX)) || - ((cpi->projected_frame_size < frame_under_shoot_limit) && (Q > bottom_index))) - ) + else if ( recode_loop_test( cpi, + frame_over_shoot_limit, frame_under_shoot_limit, + Q, top_index, bottom_index ) ) { int last_q = Q; int Retries = 0; // Frame size out of permitted range: // Update correction factor & compute new Q to try... - if (cpi->projected_frame_size > frame_over_shoot_limit) + + // Frame is too large + if (cpi->projected_frame_size > cpi->this_frame_target) { //if ( cpi->zbin_over_quant == 0 ) q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value - if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low + if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; //if ( undershoot_seen || (Q == MAXQ) ) @@ -4067,6 +4238,7 @@ overshoot_seen = TRUE; } + // Frame is too small else { if (cpi->zbin_over_quant == 0) @@ -4096,6 +4268,16 @@ Q = vp8_regulate_q(cpi, cpi->this_frame_target); + // Special case reset for qlow for constrained quality. + // This should only trigger where there is very substantial + // undershoot on a frame and the auto cq level is above + // the user passsed in value. + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < q_low) ) + { + q_low = Q; + } + while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); @@ -4160,8 +4342,44 @@ } #endif + // Special case code to reduce pulsing when key frames are forced at a + // fixed interval. Note the reconstruction error if it is the frame before + // the force key frame + if ( cpi->next_key_frame_forced && (cpi->frames_to_key == 0) ) + { + cpi->ambient_err = vp8_calc_ss_err(cpi->Source, + &cm->yv12_fb[cm->new_fb_idx], + IF_RTCD(&cpi->rtcd.variance)); + } + + // This frame's MVs are saved and will be used in next frame's MV prediction. + // Last frame has one more line(add to bottom) and one more column(add to right) than cm->mip. The edge elements are initialized to 0. + if(cm->show_frame) //do not save for altref frame + { + int mb_row; + int mb_col; + MODE_INFO *tmp = cm->mip; //point to beginning of allocated MODE_INFO arrays. + + if(cm->frame_type != KEY_FRAME) + { + for (mb_row = 0; mb_row < cm->mb_rows+1; mb_row ++) + { + for (mb_col = 0; mb_col < cm->mb_cols+1; mb_col ++) + { + if(tmp->mbmi.ref_frame != INTRA_FRAME) + cpi->lfmv[mb_col + mb_row*(cm->mode_info_stride+1)].as_int = tmp->mbmi.mv.as_int; + + cpi->lf_ref_frame_sign_bias[mb_col + mb_row*(cm->mode_info_stride+1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame]; + cpi->lf_ref_frame[mb_col + mb_row*(cm->mode_info_stride+1)] = tmp->mbmi.ref_frame; + tmp++; + } + } + } + } + // Update the GF useage maps. // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter + // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); if (cm->frame_type == KEY_FRAME) @@ -4190,54 +4408,48 @@ else cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - - - //#pragma omp parallel sections + if (cm->no_lpf) { + cm->filter_level = 0; + } + else + { + struct vpx_usec_timer timer; - //#pragma omp section - { - - struct vpx_usec_timer timer; - - vpx_usec_timer_start(&timer); - - if (cpi->sf.auto_filter == 0) - vp8cx_pick_filter_level_fast(cpi->Source, cpi); - else - vp8cx_pick_filter_level(cpi->Source, cpi); + vpx_usec_timer_start(&timer); - vpx_usec_timer_mark(&timer); + if (cpi->sf.auto_filter == 0) + vp8cx_pick_filter_level_fast(cpi->Source, cpi); + else + vp8cx_pick_filter_level(cpi->Source, cpi); - cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); + vpx_usec_timer_mark(&timer); - if (cm->no_lpf) - cm->filter_level = 0; + cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); + } - if (cm->filter_level > 0) - { - vp8cx_set_alt_lf_level(cpi, cm->filter_level); - vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level); - cm->last_frame_type = cm->frame_type; - cm->last_filter_type = cm->filter_type; - cm->last_sharpness_level = cm->sharpness_level; - } + if (cm->filter_level > 0) + { + vp8cx_set_alt_lf_level(cpi, cm->filter_level); + vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, cm->filter_level); + cm->last_filter_type = cm->filter_type; + cm->last_sharpness_level = cm->sharpness_level; + } - vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); + /* Move storing frame_type out of the above loop since it is also + * needed in motion search besides loopfilter */ + cm->last_frame_type = cm->frame_type; - if (cpi->oxcf.error_resilient_mode == 1) - { - cm->refresh_entropy_probs = 0; - } + vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - } -//#pragma omp section - { - // build the bitstream - vp8_pack_bitstream(cpi, dest, size); - } + if (cpi->oxcf.error_resilient_mode == 1) + { + cm->refresh_entropy_probs = 0; } + // build the bitstream + vp8_pack_bitstream(cpi, dest, size); + { YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; @@ -4298,9 +4510,7 @@ } // Keep a record of ambient average Q. - if (cm->frame_type == KEY_FRAME) - cpi->avg_frame_qindex = cm->base_qindex; - else + if (cm->frame_type != KEY_FRAME) cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; // Keep a record from which we can calculate the average Q excluding GF updates and key frames @@ -4308,30 +4518,38 @@ { cpi->ni_frames++; - // Calculate the average Q for normal inter frames (not key or GFU frames) - // This is used as a basis for setting active worst quality. - if (cpi->ni_frames > 150) + // Calculate the average Q for normal inter frames (not key or GFU + // frames). + if ( cpi->pass == 2 ) { cpi->ni_tot_qi += Q; cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); } - // Early in the clip ... average the current frame Q value with the default - // entered by the user as a dampening measure else { - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; - } - - // If the average Q is higher than what was used in the last frame - // (after going through the recode loop to keep the frame size within range) - // then use the last frame value - 1. - // The -1 is designed to stop Q and hence the data rate, from progressively - // falling away during difficult sections, but at the same time reduce the number of - // itterations around the recode loop. - if (Q > cpi->ni_av_qi) - cpi->ni_av_qi = Q - 1; + // Damp value for first few frames + if (cpi->ni_frames > 150 ) + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); + } + // For one pass, early in the clip ... average the current frame Q + // value with the worstq entered by the user as a dampening measure + else + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; + } + // If the average Q is higher than what was used in the last frame + // (after going through the recode loop to keep the frame size within range) + // then use the last frame value - 1. + // The -1 is designed to stop Q and hence the data rate, from progressively + // falling away during difficult sections, but at the same time reduce the number of + // itterations around the recode loop. + if (Q > cpi->ni_av_qi) + cpi->ni_av_qi = Q - 1; + } } #if 0 @@ -4359,7 +4577,8 @@ } // Update the buffer level variable. - if (cpi->common.refresh_alt_ref_frame) + // Non-viewable frames are a special case and are treated as pure overhead. + if ( !cm->show_frame ) cpi->bits_off_target -= cpi->projected_frame_size; else cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; @@ -4425,7 +4644,7 @@ if (cpi->total_coded_error_left != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" - "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" "%10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, @@ -4434,7 +4653,8 @@ (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, - cpi->avg_frame_qindex, cpi->zbin_over_quant, + cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, + //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, @@ -4443,7 +4663,7 @@ cpi->tot_recode_hits); else fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" - "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" "%8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, @@ -4452,7 +4672,8 @@ (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, - cpi->avg_frame_qindex, cpi->zbin_over_quant, + cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, + //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, @@ -4524,16 +4745,19 @@ if (cpi->oxcf.error_resilient_mode) { - // Is this an alternate reference update - if (cpi->common.refresh_alt_ref_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); + if (cm->frame_type != KEY_FRAME) + { + // Is this an alternate reference update + if (cm->refresh_alt_ref_frame) + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); - if (cpi->common.refresh_golden_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + if (cm->refresh_golden_frame) + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); + } } else { - if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame) + if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame and stats as appropriate. update_alt_ref_frame_and_stats(cpi); else @@ -4684,7 +4908,9 @@ #endif int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time) { +#if HAVE_ARMV7 INT64 store_reg[8]; +#endif VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; @@ -4787,7 +5013,9 @@ } int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush) { +#if HAVE_ARMV7 INT64 store_reg[8]; +#endif VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer tsctimer; @@ -4856,7 +5084,7 @@ { int thiserr; cpi->oxcf.arnr_strength = i; - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); thiserr = vp8_calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer, &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance)); @@ -4871,7 +5099,7 @@ if (besti != -1) { cpi->oxcf.arnr_strength = besti; - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); s = &cpi->alt_ref_buffer; // FWG not sure if I need to copy this data for the Alt Ref frame @@ -4883,7 +5111,7 @@ s = &cpi->src_buffer[cpi->last_alt_ref_sei]; #else - vp8cx_temp_filter_c(cpi); + vp8_temporal_filter_prepare_c(cpi); s = &cpi->alt_ref_buffer; // FWG not sure if I need to copy this data for the Alt Ref frame @@ -4967,17 +5195,16 @@ *frame_flags = cpi->source_frame_flags; -#if CONFIG_PSNR - if (cpi->source_time_stamp < cpi->first_time_stamp_ever) + { cpi->first_time_stamp_ever = cpi->source_time_stamp; - -#endif + cpi->last_end_time_stamp_seen = cpi->source_time_stamp; + } // adjust frame rates based on timestamps given if (!cm->refresh_alt_ref_frame) { - if (cpi->last_time_stamp_seen == 0) + if (cpi->source_time_stamp == cpi->first_time_stamp_ever) { double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp); @@ -4985,36 +5212,24 @@ } else { - long long nanosecs = cpi->source_time_stamp - cpi->last_time_stamp_seen; - double this_fps = 10000000.000 / nanosecs; + long long nanosecs = cpi->source_end_time_stamp + - cpi->last_end_time_stamp_seen; - vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8); + if (nanosecs > 0) + { + double this_fps = 10000000.000 / nanosecs; + vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8); + } } cpi->last_time_stamp_seen = cpi->source_time_stamp; + cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp; } if (cpi->compressor_speed == 2) { vp8_check_gf_quality(cpi); - } - - if (!cpi) - { -#if HAVE_ARMV7 -#if CONFIG_RUNTIME_CPU_DETECT - if (cm->rtcd.flags & HAS_NEON) -#endif - { - vp8_pop_neon(store_reg); - } -#endif - return 0; - } - - if (cpi->compressor_speed == 2) - { vpx_usec_timer_start(&tsctimer); vpx_usec_timer_start(&ticktimer); } @@ -5208,7 +5423,7 @@ return 0; } -int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags) +int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags) { VP8_COMP *cpi = (VP8_COMP *) comp; @@ -5218,7 +5433,7 @@ { int ret; #if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&cpi->common, dest, deblock_level, noise_level, flags); + ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else if (cpi->common.frame_to_show) @@ -5311,12 +5526,12 @@ { VP8_COMP *cpi = (VP8_COMP *) comp; - if (horiz_mode >= NORMAL && horiz_mode <= ONETWO) + if (horiz_mode <= ONETWO) cpi->common.horiz_scale = horiz_mode; else return -1; - if (vert_mode >= NORMAL && vert_mode <= ONETWO) + if (vert_mode <= ONETWO) cpi->common.vert_scale = vert_mode; else return -1; diff -Nru libvpx-0.9.5/vp8/encoder/onyx_int.h libvpx-0.9.6/vp8/encoder/onyx_int.h --- libvpx-0.9.5/vp8/encoder/onyx_int.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/onyx_int.h 2011-03-04 20:40:40.000000000 +0000 @@ -14,20 +14,21 @@ #include #include "vpx_ports/config.h" -#include "onyx.h" +#include "vp8/common/onyx.h" #include "treewriter.h" #include "tokenize.h" -#include "onyxc_int.h" -#include "preproc.h" +#include "vp8/common/onyxc_int.h" #include "variance.h" #include "dct.h" #include "encodemb.h" #include "quantize.h" -#include "entropy.h" -#include "threading.h" +#include "vp8/common/entropy.h" +#include "vp8/common/threading.h" #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" #include "mcomp.h" +#include "temporal_filter.h" +#include "vp8/common/findnearmv.h" //#define SPEEDSTATS 1 #define MIN_GF_INTERVAL 4 @@ -46,9 +47,13 @@ #define MAX_THRESHMULT 512 #define GF_ZEROMV_ZBIN_BOOST 24 +#define LF_ZEROMV_ZBIN_BOOST 12 +#define MV_ZBIN_BOOST 4 #define ZBIN_OQ_MAX 192 +#if !(CONFIG_REALTIME_ONLY) #define VP8_TEMPORAL_ALT_REF 1 +#endif typedef struct { @@ -180,16 +185,17 @@ int first_step; int optimize_coefficients; + int use_fastquant_for_pick; + int no_skip_block4x4_search; + int improved_mv_pred; + } SPEED_FEATURES; typedef struct { MACROBLOCK mb; - int mb_row; - TOKENEXTRA *tp; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; - int current_mb_col; } MB_ROW_COMP; typedef struct @@ -227,6 +233,7 @@ vp8_encodemb_rtcd_vtable_t encodemb; vp8_quantize_rtcd_vtable_t quantize; vp8_search_rtcd_vtable_t search; + vp8_temporal_rtcd_vtable_t temporal; } VP8_ENCODER_RTCD; enum @@ -260,6 +267,9 @@ DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]); MACROBLOCK mb; @@ -276,14 +286,14 @@ unsigned int source_frame_flags; YV12_BUFFER_CONFIG scaled_source; - int source_buffer_count; - int source_encode_index; - int source_alt_ref_pending; - int source_alt_ref_active; - - int last_alt_ref_sei; - int is_src_frame_alt_ref; - int is_next_src_alt_ref; + int source_buffer_count; // number of src_buffers in use for lagged encoding + int source_encode_index; // index of buffer in src_buffer to encode + int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref + int source_alt_ref_active; // an alt ref frame has been encoded and is usable + + int last_alt_ref_sei; // index into src_buffers of frame used as alt reference + int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame + int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame int gold_is_last; // golden frame same as last frame ( short circuit gold searches) int alt_is_last; // Alt reference frame same as last ( short circuit altref search) @@ -294,15 +304,17 @@ YV12_BUFFER_CONFIG last_frame_uf; - char *Dest; - TOKENEXTRA *tok; unsigned int tok_count; unsigned int frames_since_key; unsigned int key_frame_frequency; - unsigned int next_key; + unsigned int this_key_frame_forced; + unsigned int next_key_frame_forced; + + // Ambient reconstruction err target for force key frames + int ambient_err; unsigned int mode_check_freq[MAX_MODES]; unsigned int mode_test_hit_counts[MAX_MODES]; @@ -319,15 +331,11 @@ int mvcostmultiplier; int subseqblockweight; int errthresh; + unsigned int activity_avg; int RDMULT; int RDDIV ; - TOKENEXTRA *rdtok; - vp8_writer rdbc; - int intra_mode_costs[10]; - - CODING_CONTEXT coding_context; // Rate targetting variables @@ -335,7 +343,6 @@ long long last_prediction_error; long long intra_error; long long last_intra_error; - long long last_auto_filter_prediction_error; #if 0 // Experimental RD code @@ -350,7 +357,6 @@ int this_frame_target; int projected_frame_size; int last_q[2]; // Separate values for Intra/Inter - int target_bits_per_mb; double rate_correction_factor; double key_frame_rate_correction_factor; @@ -383,6 +389,7 @@ int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. int max_gf_interval; + int static_scene_max_gf_interval; int baseline_gf_interval; int gf_decay_rate; int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames @@ -399,6 +406,7 @@ int inter_frame_target; double output_frame_rate; long long last_time_stamp_seen; + long long last_end_time_stamp_seen; long long first_time_stamp_ever; int ni_av_qi; @@ -431,6 +439,10 @@ int best_quality; int active_best_quality; + int cq_target_quality; + int maxq_max_limit; + int maxq_min_limit; + int drop_frames_allowed; // Are we permitted to drop frames? int drop_frame; // Drop this frame? int drop_count; // How many frames have we dropped? @@ -454,8 +466,6 @@ unsigned char *output_partition2; size_t output_partition2size; - pre_proc_instance ppi; - int frames_to_key; int gfu_boost; int kf_boost; @@ -465,20 +475,30 @@ double total_coded_error_left; double start_tot_err_left; double min_error; + double kf_intra_err_min; + double gf_intra_err_min; + + double modified_error_total; + double modified_error_used; + double modified_error_left; + double clip_bpe; + double observed_bpe; - double modified_total_error_left; double avg_iiratio; int target_bandwidth; long long bits_left; + long long clip_bits_total; FIRSTPASS_STATS *total_stats; FIRSTPASS_STATS *this_frame_stats; FIRSTPASS_STATS *stats_in, *stats_in_end; struct vpx_codec_pkt_list *output_pkt_list; int first_pass_done; - unsigned char *fp_motion_map; +#if !(CONFIG_REALTIME_ONLY) + unsigned char *fp_motion_map; unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save; +#endif #if 0 // Experimental code for lagged and one pass @@ -529,8 +549,6 @@ int ref_frame_flags; - int exp[512]; - SPEED_FEATURES sf; int error_bins[1024]; @@ -576,22 +594,21 @@ int cyclic_refresh_q; signed char *cyclic_refresh_map; +#if CONFIG_MULTITHREAD // multithread data - int current_mb_col_main; + int * mt_current_mb_col; + int mt_sync_range; int processor_core_count; int b_multi_threaded; int encoding_thread_count; -#if CONFIG_MULTITHREAD pthread_t *h_encoding_thread; -#endif MB_ROW_COMP *mb_row_ei; ENCODETHREAD_DATA *en_thread_data; -#if CONFIG_MULTITHREAD //events - sem_t *h_event_mbrencoding; - sem_t h_event_main; + sem_t *h_event_start_encoding; + sem_t h_event_end_encoding; #endif TOKENLIST *tplist; @@ -611,9 +628,6 @@ unsigned int tempdata2; int base_skip_false_prob[128]; - unsigned int section_is_low_motion; - unsigned int section_benefits_from_aggresive_q; - unsigned int section_is_fast_motion; unsigned int section_intra_rating; double section_max_qfactor; @@ -661,7 +675,14 @@ unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0 int gf_active_count; + //Store last frame's MV info for next frame MV prediction + int_mv *lfmv; + int *lf_ref_frame_sign_bias; + int *lf_ref_frame; +#if CONFIG_REALTIME_ONLY + int force_next_frame_intra; /* force next frame to intra when kf_auto says so */ +#endif } VP8_COMP; void control_data_rate(VP8_COMP *cpi); @@ -670,6 +691,8 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size); +unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x); + int rd_cost_intra_mb(MACROBLOCKD *x); void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); diff -Nru libvpx-0.9.5/vp8/encoder/parms.cpp libvpx-0.9.6/vp8/encoder/parms.cpp --- libvpx-0.9.5/vp8/encoder/parms.cpp 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/parms.cpp 2011-03-04 20:40:40.000000000 +0000 @@ -16,7 +16,7 @@ #include extern "C" { - #include "onyx.h" + #include "vp8/common/onyx.h" } diff -Nru libvpx-0.9.5/vp8/encoder/pickinter.c libvpx-0.9.6/vp8/encoder/pickinter.c --- libvpx-0.9.5/vp8/encoder/pickinter.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/pickinter.c 2011-03-04 20:40:40.000000000 +0000 @@ -14,17 +14,17 @@ #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" -#include "entropymode.h" +#include "vp8/common/entropymode.h" #include "pickinter.h" -#include "findnearmv.h" +#include "vp8/common/findnearmv.h" #include "encodemb.h" -#include "reconinter.h" -#include "reconintra.h" -#include "reconintra4x4.h" -#include "g_common.h" +#include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" +#include "vp8/common/g_common.h" #include "variance.h" #include "mcomp.h" - +#include "rdopt.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_RUNTIME_CPU_DETECT @@ -168,8 +168,6 @@ B_PREDICTION_MODE *best_mode, B_PREDICTION_MODE above, B_PREDICTION_MODE left, - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, int *bestrate, int *bestdistortion) @@ -179,8 +177,6 @@ int rate; int distortion; unsigned int *mode_costs; - (void) l; - (void) a; if (x->e_mbd.frame_type == KEY_FRAME) { @@ -211,6 +207,7 @@ b->bmi.mode = (B_PREDICTION_MODE)(*best_mode); vp8_encode_intra4x4block(rtcd, x, be, b, b->bmi.mode); + return best_rd; } @@ -220,17 +217,8 @@ MACROBLOCKD *const xd = &mb->e_mbd; int i; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; - int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode + int error; int distortion = 0; - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - - vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; vp8_intra_prediction_down_copy(xd); @@ -243,10 +231,8 @@ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(d); - error += pick_intra4x4block(rtcd, - mb, mb->block + i, xd->block + i, &best_mode, A, L, - ta + vp8_block2above[i], - tl + vp8_block2left[i], &r, &d); + pick_intra4x4block(rtcd, mb, mb->block + i, xd->block + i, + &best_mode, A, L, &r, &d); cost += r; distortion += d; @@ -264,10 +250,15 @@ *Rate = cost; if (i == 16) + { *best_dist = distortion; + error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, distortion); + } else + { *best_dist = INT_MAX; - + error = INT_MAX; + } return error; } @@ -421,7 +412,6 @@ } - int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra) { BLOCK *b = &x->block[0]; @@ -430,7 +420,7 @@ B_MODE_INFO best_bmodes[16]; MB_MODE_INFO best_mbmode; PARTITION_INFO best_partition; - MV best_ref_mv1; + MV best_ref_mv; MV mode_mv[MB_MODE_COUNT]; MB_PREDICTION_MODE this_mode; int num00; @@ -448,9 +438,14 @@ int best_mode_index = 0; int sse = INT_MAX; + MV mvp; + int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + int saddone=0; + int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) + MV nearest_mv[4]; MV near_mv[4]; - MV best_ref_mv[4]; + MV frame_best_ref_mv[4]; int MDCounts[4][4]; unsigned char *y_buffer[4]; unsigned char *u_buffer[4]; @@ -470,7 +465,7 @@ YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME], - &best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias); + &frame_best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias); y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset; u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset; @@ -484,7 +479,7 @@ YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME], - &best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias); + &frame_best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias); y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset; u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset; @@ -498,7 +493,7 @@ YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME], - &best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias); + &frame_best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias); y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset; u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset; @@ -538,10 +533,6 @@ + vp8_cost_one(cpi->prob_gf_coded); } - - - best_rd = INT_MAX; - x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; // if we encode a new mv this is important @@ -604,17 +595,41 @@ x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; - best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts)); } - //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. - if (cpi->is_src_frame_alt_ref) + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; } + if(cpi->sf.improved_mv_pred && x->e_mbd.mode_info_context->mbmi.mode == NEWMV) + { + if(!saddone) + { + vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] ); + saddone = 1; + } + + vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, + x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); + + /* adjust mvp to make sure it is within MV range */ + if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL; + else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL; + if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL; + else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL; + } + switch (this_mode) { case B_PRED: @@ -670,61 +685,59 @@ int n = 0; int sadpb = x->sadperbit16; - // Further step/diamond searches as necessary - if (cpi->Speed < 8) - { - step_param = cpi->sf.first_step + ((cpi->Speed > 5) ? 1 : 0); - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - } - else - { - step_param = cpi->sf.first_step + 2; - further_steps = 0; - } + int col_min; + int col_max; + int row_min; + int row_max; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; -#if 0 + int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8)? 3 : 2) : 1; - // Initial step Search - bestsme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, cpi->mb.mvcost); - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + // Further step/diamond searches as necessary + step_param = cpi->sf.first_step + speed_adjust; - // Further step searches - while (n < further_steps) + if(cpi->sf.improved_mv_pred) { - n++; - - if (num00) - num00--; - else - { - thissme = vp8_diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, x->errorperbit, &num00, &cpi->fn_ptr, cpi->mb.mvsadcost, x->mvcost); - - if (thissme < bestsme) - { - bestsme = thissme; - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; - } - else - { - d->bmi.mv.as_mv.row = mode_mv[NEWMV].row; - d->bmi.mv.as_mv.col = mode_mv[NEWMV].col; - } - } + sr += speed_adjust; + //adjust search range according to sr from mv prediction + if(sr > step_param) + step_param = sr; + + col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3; + col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3; + row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3; + row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3; + + // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. + if (x->mv_col_min < col_min ) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max ) + x->mv_col_max = col_max; + if (x->mv_row_min < row_min ) + x->mv_row_min = row_min; + if (x->mv_row_max > row_max ) + x->mv_row_max = row_max; + }else + { + mvp.row = best_ref_mv.row; + mvp.col = best_ref_mv.col; } -#else + further_steps = (cpi->Speed >= 8)? 0: (cpi->sf.max_step_search_steps - 1 - step_param); if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -743,7 +756,7 @@ num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9 if (thissme < bestsme) { @@ -760,18 +773,23 @@ } } -#endif - } - - if (bestsme < INT_MAX) - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost); + if(cpi->sf.improved_mv_pred) + { + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + } - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + if (bestsme < INT_MAX) + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost); - // mv cost; - rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv1, cpi->mb.mvcost, 128); + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + // mv cost; + rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128); + } case NEARESTMV: case NEARMV: diff -Nru libvpx-0.9.5/vp8/encoder/pickinter.h libvpx-0.9.6/vp8/encoder/pickinter.h --- libvpx-0.9.5/vp8/encoder/pickinter.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/pickinter.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,7 +12,7 @@ #ifndef __INC_PICKINTER_H #define __INC_PICKINTER_H #include "vpx_ports/config.h" -#include "onyxc_int.h" +#include "vp8/common/onyxc_int.h" #define RD_ESTIMATE(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion); diff -Nru libvpx-0.9.5/vp8/encoder/picklpf.c libvpx-0.9.6/vp8/encoder/picklpf.c --- libvpx-0.9.5/vp8/encoder/picklpf.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/picklpf.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,13 +9,13 @@ */ -#include "onyxc_int.h" +#include "vp8/common/onyxc_int.h" #include "onyx_int.h" #include "quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" -#include "alloccommon.h" +#include "vp8/common/alloccommon.h" #if ARCH_ARM #include "vpx_ports/arm.h" #endif @@ -296,7 +296,6 @@ int filt_err = 0; int min_filter_level; int max_filter_level; - int prediction_difference = (int)(100 * abs((int)(cpi->last_auto_filter_prediction_error - cpi->prediction_error)) / (1 + cpi->prediction_error)); int filter_step; int filt_high = 0; @@ -478,6 +477,5 @@ cpi->last_auto_filt_val = filt_best; cpi->last_auto_filt_q = cm->base_qindex; - cpi->last_auto_filter_prediction_error = cpi->prediction_error; cpi->frames_since_auto_filter = 0; } diff -Nru libvpx-0.9.5/vp8/encoder/ppc/csystemdependent.c libvpx-0.9.6/vp8/encoder/ppc/csystemdependent.c --- libvpx-0.9.5/vp8/encoder/ppc/csystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/ppc/csystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ -#include "variance.h" -#include "onyx_int.h" +#include "vp8/encoder/variance.h" +#include "vp8/encoder/onyx_int.h" SADFunction *vp8_sad16x16; SADFunction *vp8_sad16x8; diff -Nru libvpx-0.9.5/vp8/encoder/preproc.c libvpx-0.9.6/vp8/encoder/preproc.c --- libvpx-0.9.5/vp8/encoder/preproc.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/preproc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,251 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : preproc.c -* -* Description : Simple pre-processor. -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ - -#include "memory.h" -#include "preproc7.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** -* Macros -****************************************************************************/ -#define FRAMECOUNT 7 -#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) ) - -/**************************************************************************** -* Imports -****************************************************************************/ -extern void vp8_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled); - -/**************************************************************************** -* Exported Global Variables -****************************************************************************/ -void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength); -void temp_filter_mmx -( - pre_proc_instance *ppi, - unsigned char *s, - unsigned char *d, - int bytes, - int strength -); -void temp_filter_wmt -( - pre_proc_instance *ppi, - unsigned char *s, - unsigned char *d, - int bytes, - int strength -); - -/**************************************************************************** - * - * ROUTINE : temp_filter_c - * - * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance. - * unsigned char *s : Pointer to source frame. - * unsigned char *d : Pointer to destination frame. - * int bytes : Number of bytes to filter. - * int strength : Strength of filter to apply. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs a closesness adjusted temporarl blur - * - * SPECIAL NOTES : Destination frame can be same as source frame. - * - ****************************************************************************/ -void temp_filter_c -( - pre_proc_instance *ppi, - unsigned char *s, - unsigned char *d, - int bytes, - int strength -) -{ - int byte = 0; - unsigned char *frameptr = ppi->frame_buffer; - - if (ppi->frame == 0) - { - do - { - int frame = 0; - - do - { - *frameptr = s[byte]; - ++frameptr; - ++frame; - } - while (frame < FRAMECOUNT); - - d[byte] = s[byte]; - - ++byte; - } - while (byte < bytes); - } - else - { - int modifier; - int offset = (ppi->frame % FRAMECOUNT); - - do - { - int accumulator = 0; - int count = 0; - int frame = 0; - - frameptr[offset] = s[byte]; - - do - { - int pixel_value = *frameptr; - - modifier = s[byte]; - modifier -= pixel_value; - modifier *= modifier; - modifier >>= strength; - modifier *= 3; - - if (modifier > 16) - modifier = 16; - - modifier = 16 - modifier; - - accumulator += modifier * pixel_value; - - count += modifier; - - frameptr++; - - ++frame; - } - while (frame < FRAMECOUNT); - - accumulator += (count >> 1); - accumulator *= ppi->fixed_divide[count]; - accumulator >>= 16; - - d[byte] = accumulator; - - ++byte; - } - while (byte < bytes); - } - - ++ppi->frame; -} -/**************************************************************************** - * - * ROUTINE : delete_pre_proc - * - * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Deletes a pre-processing instance. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void delete_pre_proc(pre_proc_instance *ppi) -{ - if (ppi->frame_buffer_alloc) - vpx_free(ppi->frame_buffer_alloc); - - ppi->frame_buffer_alloc = 0; - ppi->frame_buffer = 0; - - if (ppi->fixed_divide_alloc) - vpx_free(ppi->fixed_divide_alloc); - - ppi->fixed_divide_alloc = 0; - ppi->fixed_divide = 0; -} - -/**************************************************************************** - * - * ROUTINE : init_pre_proc - * - * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance. - * int frame_size : Number of bytes in one frame. - * - * OUTPUTS : None. - * - * RETURNS : int: 1 if successful, 0 if failed. - * - * FUNCTION : Initializes prepprocessor instance. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -int init_pre_proc7(pre_proc_instance *ppi, int frame_size) -{ - int i; - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; - - vp8_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - - if (wmt_enabled) - temp_filter = temp_filter_wmt; - else if (mmx_enabled) - temp_filter = temp_filter_mmx; - else - temp_filter = temp_filter_c; - - - delete_pre_proc(ppi); - - ppi->frame_buffer_alloc = vpx_malloc(32 + frame_size * FRAMECOUNT * sizeof(unsigned char)); - - if (!ppi->frame_buffer_alloc) - { - delete_pre_proc(ppi); - return 0; - } - - ppi->frame_buffer = (unsigned char *) ROUNDUP32(ppi->frame_buffer_alloc); - - ppi->fixed_divide_alloc = vpx_malloc(32 + 255 * sizeof(unsigned int)); - - if (!ppi->fixed_divide_alloc) - { - delete_pre_proc(ppi); - return 0; - } - - ppi->fixed_divide = (unsigned int *) ROUNDUP32(ppi->fixed_divide_alloc); - - for (i = 1; i < 255; i++) - ppi->fixed_divide[i] = 0x10000 / i; - - return 1; -} diff -Nru libvpx-0.9.5/vp8/encoder/psnr.c libvpx-0.9.6/vp8/encoder/psnr.c --- libvpx-0.9.5/vp8/encoder/psnr.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/psnr.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,7 +11,7 @@ #include "vpx_scale/yv12config.h" #include "math.h" -#include "systemdependent.h" /* for vp8_clear_system_state() */ +#include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */ #define MAX_PSNR 60 diff -Nru libvpx-0.9.5/vp8/encoder/quantize.c libvpx-0.9.6/vp8/encoder/quantize.c --- libvpx-0.9.5/vp8/encoder/quantize.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/quantize.c 2011-03-04 20:40:40.000000000 +0000 @@ -13,11 +13,11 @@ #include "vpx_mem/vpx_mem.h" #include "quantize.h" -#include "entropy.h" -#include "predictdc.h" +#include "vp8/common/entropy.h" -//#define EXACT_QUANT -#ifdef EXACT_QUANT +#define EXACT_QUANT + +#ifdef EXACT_FASTQUANT void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; @@ -26,7 +26,7 @@ short *coeff_ptr = b->coeff; short *zbin_ptr = b->zbin; short *round_ptr = b->round; - short *quant_ptr = b->quant; + short *quant_ptr = b->quant_fast; short *quant_shift_ptr = b->quant_shift; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; @@ -64,6 +64,44 @@ d->eob = eob + 1; } +#else + +void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *round_ptr = b->round; + short *quant_ptr = b->quant_fast; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + + eob = -1; + for (i = 0; i < 16; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + d->eob = eob + 1; +} + +#endif + +#ifdef EXACT_QUANT void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) { int i, rc, eob; @@ -90,9 +128,6 @@ rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; - //if ( i == 0 ) - // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; - //else zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; zbin_boost_ptr ++; @@ -105,13 +140,13 @@ y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; // quantize (x) x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value if (y) { eob = i; // last nonzero coeffs - zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength } } } @@ -178,39 +213,6 @@ } #else -void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) -{ - int i, rc, eob; - int zbin; - int x, y, z, sz; - short *coeff_ptr = b->coeff; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - - eob = -1; - for (i = 0; i < 16; i++) - { - rc = vp8_default_zig_zag1d[i]; - z = coeff_ptr[rc]; - - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value - - if (y) - { - eob = i; // last nonzero coeffs - } - } - d->eob = eob + 1; -} void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) { diff -Nru libvpx-0.9.5/vp8/encoder/ratectrl.c libvpx-0.9.6/vp8/encoder/ratectrl.c --- libvpx-0.9.5/vp8/encoder/ratectrl.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/ratectrl.c 2011-03-04 20:40:40.000000000 +0000 @@ -16,11 +16,11 @@ #include #include "math.h" -#include "common.h" +#include "vp8/common/common.h" #include "ratectrl.h" -#include "entropymode.h" +#include "vp8/common/entropymode.h" #include "vpx_mem/vpx_mem.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" #include "encodemv.h" @@ -45,46 +45,48 @@ // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 +// Work in progress recalibration of baseline rate tables based on +// the assumption that bits per mb is inversely proportional to the +// quantizer value. const int vp8_bits_per_mb[2][QINDEX_RANGE] = { - // (Updated 19 March 08) Baseline estimate of INTRA-frame Bits Per MB at each Q: + // Intra case 450000/Qintra { - 674781, 606845, 553905, 524293, 500428, 452540, 435379, 414719, - 390970, 371082, 359416, 341807, 336957, 317263, 303724, 298402, - 285688, 275237, 268455, 262560, 256038, 248734, 241087, 237615, - 229247, 225211, 219112, 213920, 211559, 202714, 198482, 193401, - 187866, 183453, 179212, 175965, 171852, 167235, 163972, 160560, - 156032, 154349, 151390, 148725, 145708, 142311, 139981, 137700, - 134084, 131863, 129746, 128498, 126077, 123461, 121290, 117782, - 114883, 112332, 108410, 105685, 103434, 101192, 98587, 95959, - 94059, 92017, 89970, 87936, 86142, 84801, 82736, 81106, - 79668, 78135, 76641, 75103, 73943, 72693, 71401, 70098, - 69165, 67901, 67170, 65987, 64923, 63534, 62378, 61302, - 59921, 58941, 57844, 56782, 55960, 54973, 54257, 53454, - 52230, 50938, 49962, 49190, 48288, 47270, 46738, 46037, - 45020, 44027, 43216, 42287, 41594, 40702, 40081, 39414, - 38282, 37627, 36987, 36375, 35808, 35236, 34710, 34162, - 33659, 33327, 32751, 32384, 31936, 31461, 30982, 30582, + 1125000,900000, 750000, 642857, 562500, 500000, 450000, 450000, + 409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705, + 250000, 236842, 225000, 225000, 214285, 214285, 204545, 204545, + 195652, 195652, 187500, 180000, 180000, 173076, 166666, 160714, + 155172, 150000, 145161, 140625, 136363, 132352, 128571, 125000, + 121621, 121621, 118421, 115384, 112500, 109756, 107142, 104651, + 102272, 100000, 97826, 97826, 95744, 93750, 91836, 90000, + 88235, 86538, 84905, 83333, 81818, 80357, 78947, 77586, + 76271, 75000, 73770, 72580, 71428, 70312, 69230, 68181, + 67164, 66176, 65217, 64285, 63380, 62500, 61643, 60810, + 60000, 59210, 59210, 58441, 57692, 56962, 56250, 55555, + 54878, 54216, 53571, 52941, 52325, 51724, 51136, 50561, + 49450, 48387, 47368, 46875, 45918, 45000, 44554, 44117, + 43269, 42452, 41666, 40909, 40178, 39473, 38793, 38135, + 36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088, + 32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662, }, - - // (Updated 19 March 08) Baseline estimate of INTER-frame Bits Per MB at each Q: + // Inter case 285000/Qinter { - 497401, 426316, 372064, 352732, 335763, 283921, 273848, 253321, - 233181, 217727, 210030, 196685, 194836, 178396, 167753, 164116, - 154119, 146929, 142254, 138488, 133591, 127741, 123166, 120226, - 114188, 111756, 107882, 104749, 102522, 96451, 94424, 90905, - 87286, 84931, 82111, 80534, 77610, 74700, 73037, 70715, - 68006, 67235, 65374, 64009, 62134, 60180, 59105, 57691, - 55509, 54512, 53318, 52693, 51194, 49840, 48944, 46980, - 45668, 44177, 42348, 40994, 39859, 38889, 37717, 36391, - 35482, 34622, 33795, 32756, 32002, 31492, 30573, 29737, - 29152, 28514, 27941, 27356, 26859, 26329, 25874, 25364, - 24957, 24510, 24290, 23689, 23380, 22845, 22481, 22066, - 21587, 21219, 20880, 20452, 20260, 19926, 19661, 19334, - 18915, 18391, 18046, 17833, 17441, 17105, 16888, 16729, - 16383, 16023, 15706, 15442, 15222, 14938, 14673, 14452, - 14005, 13807, 13611, 13447, 13223, 13102, 12963, 12801, - 12627, 12534, 12356, 12228, 12056, 11907, 11746, 11643, + 712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090, + 237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000, + 142500, 135714, 129545, 123913, 118750, 114000, 109615, 105555, + 101785, 98275, 95000, 91935, 89062, 86363, 83823, 81428, + 79166, 77027, 75000, 73076, 71250, 69512, 67857, 66279, + 64772, 63333, 61956, 60638, 59375, 58163, 57000, 55882, + 54807, 53773, 52777, 51818, 50892, 50000, 49137, 47500, + 45967, 44531, 43181, 41911, 40714, 39583, 38513, 37500, + 36538, 35625, 34756, 33928, 33139, 32386, 31666, 30978, + 30319, 29687, 29081, 28500, 27941, 27403, 26886, 26388, + 25909, 25446, 25000, 24568, 23949, 23360, 22800, 22265, + 21755, 21268, 20802, 20357, 19930, 19520, 19127, 18750, + 18387, 18037, 17701, 17378, 17065, 16764, 16473, 16101, + 15745, 15405, 15079, 14766, 14467, 14179, 13902, 13636, + 13380, 13133, 12895, 12666, 12445, 12179, 11924, 11632, + 11445, 11220, 11003, 10795, 10594, 10401, 10215, 10035, } }; @@ -324,6 +326,7 @@ cpi->frames_till_gf_update_due = cpi->goldfreq; cpi->common.refresh_golden_frame = TRUE; + cpi->common.refresh_alt_ref_frame = TRUE; } void vp8_calc_auto_iframe_target_size(VP8_COMP *cpi) @@ -1034,9 +1037,7 @@ gf_frame_useage = pct_gf_active; // Is a fixed manual GF frequency being used - if (!cpi->auto_gold) - cpi->common.refresh_golden_frame = TRUE; - else + if (cpi->auto_gold) { // For one pass throw a GF if recent frame intra useage is low or the GF useage is high if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) @@ -1549,12 +1550,21 @@ *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; } } - // VBR + // VBR and CQ mode // Note that tighter restrictions here can help quality but hurt encode speed else { - *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; + // Stron overshoot limit for constrained quality + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8; + } + else + { + *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; + } } } } diff -Nru libvpx-0.9.5/vp8/encoder/rdopt.c libvpx-0.9.6/vp8/encoder/rdopt.c --- libvpx-0.9.5/vp8/encoder/rdopt.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/rdopt.c 2011-03-04 20:40:40.000000000 +0000 @@ -13,30 +13,29 @@ #include #include #include -#include "pragmas.h" +#include "vp8/common/pragmas.h" #include "tokenize.h" #include "treewriter.h" #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" -#include "entropymode.h" -#include "reconinter.h" -#include "reconintra.h" -#include "reconintra4x4.h" -#include "findnearmv.h" +#include "vp8/common/entropymode.h" +#include "vp8/common/reconinter.h" +#include "vp8/common/reconintra.h" +#include "vp8/common/reconintra4x4.h" +#include "vp8/common/findnearmv.h" #include "encodemb.h" #include "quantize.h" -#include "idct.h" -#include "g_common.h" +#include "vp8/common/idct.h" +#include "vp8/common/g_common.h" #include "variance.h" #include "mcomp.h" #include "vpx_mem/vpx_mem.h" #include "dct.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" -#define DIAMONDSEARCH 1 #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else @@ -44,21 +43,10 @@ #endif -void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); +extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); +extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); -#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) -/*int RDFUNC( int RM,int DM, int R, int D, int target_r ) -{ - int rd_value; - - rd_value = ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ); - - return rd_value; -}*/ - -#define UVRDFUNC(RM,DM,R,D,target_r) RDFUNC(RM,DM,R,D,target_r) - #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) @@ -173,45 +161,44 @@ }; -// The values in this table should be reviewed -static int sad_per_bit16lut[128] = +/* values are now correlated to quantizer */ +static int sad_per_bit16lut[QINDEX_RANGE] = { - 4, 4, 4, 4, 4, 4, 4, 4, // 4 - 4, 4, 4, 4, 4, 4, 4, 4, // 1 - 4, 4, 4, 4, 4, 4, 4, 4, // 2 - 4, 4, 4, 4, 4, 4, 4, 4, // 3 - 4, 4, 4, 4, 4, 4, 4, 4, // 4 - 4, 4, 12, 12, 13, 13, 14, 14, // 5 - 14, 14, 14, 15, 15, 15, 15, 15, // 6 - 15, 15, 15, 15, 15, 15, 15, 15, // 7 - 15, 15, 15, 15, 15, 16, 16, 16, // 8 - 16, 16, 18, 18, 18, 18, 19, 19, // 9 - 19, 19, 19, 19, 19, 19, 19, 19, // 10 - 20, 20, 22, 22, 22, 22, 21, 21, // 11 - 22, 22, 22, 22, 22, 22, 22, 22, // 12 - 22, 22, 22, 22, 22, 22, 22, 22, // 13 - 22, 22, 22, 22, 22, 22, 22, 22, // 14 - 22, 22, 22, 22, 22, 22, 22, 22, // 15 + 5, 5, 5, 5, 5, 5, 6, 6, + 6, 6, 6, 6, 6, 7, 7, 7, + 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 9, 9, + 9, 9, 9, 9, 10, 10, 10, 10, + 10, 10, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 13, + 13, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 17, 17, + 17, 17, 17, 17, 17, 18, 18, 18, + 18, 18, 19, 19, 19, 19, 19, 19, + 20, 20, 20, 21, 21, 21, 21, 22, + 22, 22, 23, 23, 23, 24, 24, 24, + 25, 25, 26, 26, 27, 27, 27, 28, + 28, 28, 29, 29, 30, 30, 31, 31 }; - -static int sad_per_bit4lut[128] = +static int sad_per_bit4lut[QINDEX_RANGE] = { - 4, 4, 4, 4, 4, 4, 4, 4, // 4 - 4, 4, 4, 4, 4, 4, 4, 4, // 1 - 4, 4, 4, 4, 4, 4, 4, 4, // 2 - 4, 4, 4, 4, 4, 4, 4, 4, // 3 - 4, 4, 4, 4, 4, 4, 4, 4, // 4 - 4, 4, 15, 15, 15, 15, 16, 16, // 5 - 16, 17, 17, 17, 17, 17, 17, 17, // 6 - 17, 17, 19, 19, 22, 22, 21, 21, // 7 - 23, 23, 23, 23, 23, 24, 24, 24, // 8 - 25, 25, 27, 27, 27, 27, 28, 28, // 9 - 28, 28, 29, 29, 29, 29, 29, 29, // 10 - 30, 30, 31, 31, 31, 31, 32, 32, // 11 - 34, 34, 34, 34, 34, 34, 34, 34, // 12 - 34, 34, 34, 34, 34, 34, 34, 34, // 13 - 34, 34, 34, 34, 34, 34, 34, 34, // 14 - 34, 34, 34, 34, 34, 34, 34, 34, // 15 + 5, 5, 5, 5, 5, 5, 7, 7, + 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 11, 11, + 11, 11, 11, 11, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, + 16, 16, 16, 16, 16, 16, 16, 17, + 17, 17, 17, 17, 17, 19, 19, 19, + 19, 19, 20, 20, 20, 20, 20, 20, + 22, 22, 22, 22, 22, 22, 23, 23, + 23, 23, 23, 23, 23, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, + 28, 28, 28, 29, 29, 29, 29, 31, + 31, 31, 32, 32, 32, 34, 34, 34, + 35, 35, 37, 37, 38, 38, 38, 40, + 40, 40, 41, 41, 43, 43, 44, 44, }; void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) @@ -224,8 +211,6 @@ { int q; int i; - int *thresh; - int threshmult; double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; double rdconst = 3.00; @@ -258,13 +243,8 @@ cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4; } - if (cpi->RDMULT < 125) - cpi->RDMULT = 125; - cpi->mb.errorperbit = (cpi->RDMULT / 100); - - if (cpi->mb.errorperbit < 1) - cpi->mb.errorperbit = 1; + cpi->mb.errorperbit += (cpi->mb.errorperbit==0); vp8_set_speed_features(cpi); @@ -276,22 +256,6 @@ if (q < 8) q = 8; - if (cpi->ref_frame_flags == VP8_ALT_FLAG) - { - thresh = &cpi->rd_threshes[THR_NEWA]; - threshmult = cpi->sf.thresh_mult[THR_NEWA]; - } - else if (cpi->ref_frame_flags == VP8_GOLD_FLAG) - { - thresh = &cpi->rd_threshes[THR_NEWG]; - threshmult = cpi->sf.thresh_mult[THR_NEWG]; - } - else - { - thresh = &cpi->rd_threshes[THR_NEWMV]; - threshmult = cpi->sf.thresh_mult[THR_NEWMV]; - } - if (cpi->RDMULT > 1000) { cpi->RDDIV = 1; @@ -477,67 +441,6 @@ return error; } -#if !(CONFIG_REALTIME_ONLY) -static int macro_block_max_error(MACROBLOCK *mb) -{ - int error = 0; - int dc = 0; - BLOCK *be; - int i, j; - int berror; - - dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV); - - for (i = 0; i < 16; i++) - { - be = &mb->block[i]; - - berror = 0; - - for (j = dc; j < 16; j++) - { - int this_diff = be->coeff[j]; - berror += this_diff * this_diff; - } - - error += berror; - } - - for (i = 16; i < 24; i++) - { - be = &mb->block[i]; - berror = 0; - - for (j = 0; j < 16; j++) - { - int this_diff = be->coeff[j]; - berror += this_diff * this_diff; - } - - error += berror; - } - - error <<= 2; - - if (dc) - { - be = &mb->block[24]; - berror = 0; - - for (j = 0; j < 16; j++) - { - int this_diff = be->coeff[j]; - berror += this_diff * this_diff; - } - - error += berror; - } - - error >>= 4; - return error; -} -#endif - int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) { unsigned char *uptr, *vptr; @@ -610,11 +513,10 @@ return cost; } -int vp8_rdcost_mby(MACROBLOCK *mb) +static int vp8_rdcost_mby(MACROBLOCK *mb) { int cost = 0; int b; - int type = 0; MACROBLOCKD *x = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; @@ -626,29 +528,78 @@ ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - if (x->mode_info_context->mbmi.mode == SPLITMV) - type = 3; - for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, x->block + b, type, + cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC, ta + vp8_block2above[b], tl + vp8_block2left[b]); - if (x->mode_info_context->mbmi.mode != SPLITMV) - cost += cost_coeffs(mb, x->block + 24, 1, - ta + vp8_block2above[24], tl + vp8_block2left[24]); + cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2, + ta + vp8_block2above[24], tl + vp8_block2left[24]); return cost; } +static void macro_block_yrd( MACROBLOCK *mb, + int *Rate, + int *Distortion, + const vp8_encodemb_rtcd_vtable_t *rtcd) +{ + int b; + MACROBLOCKD *const x = &mb->e_mbd; + BLOCK *const mb_y2 = mb->block + 24; + BLOCKD *const x_y2 = x->block + 24; + short *Y2DCPtr = mb_y2->src_diff; + BLOCK *beptr; + int d; + + ENCODEMB_INVOKE(rtcd, submby)( mb->src_diff, mb->src.y_buffer, + mb->e_mbd.predictor, mb->src.y_stride ); + + // Fdct and building the 2nd order block + for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) + { + mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); + *Y2DCPtr++ = beptr->coeff[0]; + *Y2DCPtr++ = beptr->coeff[16]; + } + + // 2nd order fdct + mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); + + // Quantization + for (b = 0; b < 16; b++) + { + mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); + } + + // DC predication and Quantization of 2nd Order block + mb->quantize_b(mb_y2, x_y2); + + // Distortion + d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2; + d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff); + + *Distortion = (d >> 4); + + // rate + *Rate = vp8_rdcost_mby(mb); +} -static void rd_pick_intra4x4block( +static void copy_predictor(unsigned char *dst, const unsigned char *predictor) +{ + const unsigned int *p = (const unsigned int *)predictor; + unsigned int *d = (unsigned int *)dst; + d[0] = p[0]; + d[4] = p[4]; + d[8] = p[8]; + d[12] = p[12]; +} +static int rd_pick_intra4x4block( VP8_COMP *cpi, MACROBLOCK *x, BLOCK *be, BLOCKD *b, B_PREDICTION_MODE *best_mode, - B_PREDICTION_MODE above, - B_PREDICTION_MODE left, + unsigned int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, @@ -657,36 +608,36 @@ int *bestdistortion) { B_PREDICTION_MODE mode; - int best_rd = INT_MAX; // 1<<30 + int best_rd = INT_MAX; int rate = 0; int distortion; - unsigned int *mode_costs; ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; - - - if (x->e_mbd.frame_type == KEY_FRAME) - { - mode_costs = x->bmode_costs[above][left]; - } - else - { - mode_costs = x->inter_bmode_costs; - } + /* + * The predictor buffer is a 2d buffer with a stride of 16. Create + * a temp buffer that meets the stride requirements, but we are only + * interested in the left 4x4 block + * */ + DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16*4); + DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16); for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++) { int this_rd; int ratey; - rate = mode_costs[mode]; - vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, mode); + rate = bmode_costs[mode]; + + vp8_predict_intra4x4(b, mode, b->predictor); + ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16); + x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); + x->quantize_b(be, b); tempa = ta; templ = tl; - ratey = cost_coeffs(x, b, 3, &tempa, &templ); + ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ); rate += ratey; distortion = ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), berr)(be->coeff, b->dqcoeff) >> 2; @@ -701,25 +652,32 @@ *best_mode = mode; *a = tempa; *l = templ; + copy_predictor(best_predictor, b->predictor); + vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); } } b->bmi.mode = (B_PREDICTION_MODE)(*best_mode); - vp8_encode_intra4x4block_rd(IF_RTCD(&cpi->rtcd), x, be, b, b->bmi.mode); -} + IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32); + RECON_INVOKE(IF_RTCD(&cpi->rtcd.common->recon), recon)(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + return best_rd; +} -int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion) +int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, + int *rate_y, int *Distortion, int best_rd) { MACROBLOCKD *const xd = &mb->e_mbd; int i; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int distortion = 0; int tot_rate_y = 0; + long long total_rd = 0; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; + unsigned int *bmode_costs; vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -729,17 +687,25 @@ vp8_intra_prediction_down_copy(xd); + bmode_costs = mb->inter_bmode_costs; + for (i = 0; i < 16; i++) { MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; - const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode; - const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L, + if (mb->e_mbd.frame_type == KEY_FRAME) + { + const B_PREDICTION_MODE A = vp8_above_bmi(mic, i, mis)->mode; + const B_PREDICTION_MODE L = vp8_left_bmi(mic, i)->mode; + + bmode_costs = mb->bmode_costs[A][L]; + } + + total_rd += rd_pick_intra4x4block( + cpi, mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d); @@ -747,42 +713,43 @@ distortion += d; tot_rate_y += ry; mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode; + + if(total_rd >= (long long)best_rd) + break; } + if(total_rd >= (long long)best_rd) + return INT_MAX; + *Rate = cost; *rate_y += tot_rate_y; *Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } - -int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion) +int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, + MACROBLOCK *x, + int *Rate, + int *rate_y, + int *Distortion) { - MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); int rate, ratey; - unsigned int distortion; + int distortion; int best_rd = INT_MAX; + int this_rd; //Y Search for 16x16 intra prediction mode for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int this_rd; - int dummy; - rate = 0; - x->e_mbd.mode_info_context->mbmi.mode = mode; - rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; + vp8_build_intra_predictors_mby_ptr(&x->e_mbd); - vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x); - - ratey = vp8_rdcost_mby(x); - - rate += ratey; - - VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride, &distortion, &dummy); + macro_block_yrd(x, &ratey, &distortion, IF_RTCD(&cpi->rtcd.encodemb)); + rate = ratey + x->mbmode_cost[x->e_mbd.frame_type] + [x->e_mbd.mode_info_context->mbmi.mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -792,7 +759,7 @@ best_rd = this_rd; *Rate = rate; *rate_y = ratey; - *Distortion = (int)distortion; + *Distortion = distortion; } } @@ -800,7 +767,6 @@ return best_rd; } - static int rd_cost_mbuv(MACROBLOCK *mb) { int b; @@ -816,27 +782,14 @@ ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; - for (b = 16; b < 20; b++) - cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], - ta + vp8_block2above[b], tl + vp8_block2left[b]); - - for (b = 20; b < 24; b++) - cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], + for (b = 16; b < 24; b++) + cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b]); return cost; } -unsigned int vp8_get_mbuvrecon_error(const vp8_variance_rtcd_vtable_t *rtcd, const MACROBLOCK *x) // sum of squares -{ - unsigned int sse0, sse1; - int sum0, sum1; - VARIANCE_INVOKE(rtcd, get8x8var)(x->src.u_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.uv_stride, &sse0, &sum0); - VARIANCE_INVOKE(rtcd, get8x8var)(x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride, &sse1, &sum1); - return (sse0 + sse1); -} - static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel) { vp8_build_uvmvs(&x->e_mbd, fullpixel); @@ -846,7 +799,7 @@ *rate = rd_cost_mbuv(x); *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; - return UVRDFUNC(x->rdmult, x->rddiv, *rate, *distortion, cpi->target_bits_per_mb); + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) @@ -864,14 +817,19 @@ int this_rd; x->e_mbd.mode_info_context->mbmi.uv_mode = mode; - vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x); + vp8_build_intra_predictors_mbuv(&x->e_mbd); + ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), submbuv)(x->src_diff, + x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, + x->src.uv_stride); + vp8_transform_mbuv(x); + vp8_quantize_mbuv(x); rate_to = rd_cost_mbuv(x); rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode]; - distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x); + distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; - this_rd = UVRDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { @@ -918,21 +876,6 @@ } #if !(CONFIG_REALTIME_ONLY) -int vp8_count_labels(int const *labelings) -{ - int i; - int count = 0; - - for (i = 0; i < 16; i++) - { - if (labelings[i] > count) - count = labelings[i]; - } - - return count + 1; -} - - static int labels2mode( MACROBLOCK *x, int const *labelings, int which_label, @@ -1021,7 +964,7 @@ for (b = 0; b < 16; b++) if (labels[ b] == which_label) - cost += cost_coeffs(mb, x->block + b, 3, + cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC, ta + vp8_block2above[b], tl + vp8_block2left[b]); @@ -1056,377 +999,735 @@ return distortion; } -static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp8_encodemb_rtcd_vtable_t *rtcd) -{ - int b; - MACROBLOCKD *const x = &mb->e_mbd; - BLOCK *const mb_y2 = mb->block + 24; - BLOCKD *const x_y2 = x->block + 24; - short *Y2DCPtr = mb_y2->src_diff; - BLOCK *beptr; - int d; +unsigned char vp8_mbsplit_offset2[4][16] = { + { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; - ENCODEMB_INVOKE(rtcd, submby)(mb->src_diff, mb->src.y_buffer, mb->e_mbd.predictor, mb->src.y_stride); - // Fdct and building the 2nd order block - for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) - { - mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); - *Y2DCPtr++ = beptr->coeff[0]; - *Y2DCPtr++ = beptr->coeff[16]; - } +static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; - // 2nd order fdct - if (x->mode_info_context->mbmi.mode != SPLITMV) - { - mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); - } - // Quantization - for (b = 0; b < 16; b++) - { - mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); - } +typedef struct +{ + MV *ref_mv; + MV *mvp; - // DC predication and Quantization of 2nd Order block - if (x->mode_info_context->mbmi.mode != SPLITMV) - { + int segment_rd; + int segment_num; + int r; + int d; + int segment_yrate; + B_PREDICTION_MODE modes[16]; + int_mv mvs[16]; + unsigned char eobs[16]; - { - mb->quantize_b(mb_y2, x_y2); - } - } + int mvthresh; + int *mdcounts; - // Distortion - if (x->mode_info_context->mbmi.mode == SPLITMV) - d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2; - else - { - d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 1) << 2; - d += ENCODEMB_INVOKE(rtcd, berr)(mb_y2->coeff, x_y2->dqcoeff); - } + MV sv_mvp[4]; // save 4 mvp from 8x8 + int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 - *Distortion = (d >> 4); +} BEST_SEG_INFO; - // rate - *Rate = vp8_rdcost_mby(mb); -} -static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int compressor_speed, int *mvcost[2], int mvthresh, int fullpixel) +void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, + unsigned int segmentation) { - int i, segmentation; - B_PREDICTION_MODE this_mode; - MACROBLOCKD *xc = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - BLOCK *c = &x->block[0]; - BLOCKD *e = &x->e_mbd.block[0]; + int i; int const *labels; - int best_segment_rd = INT_MAX; - int best_seg = 0; int br = 0; int bd = 0; - int bsr = 0; - int bsd = 0; - int bestsegmentyrate = 0; - - static const int segmentation_to_sseshift[4] = {3, 3, 2, 0}; - - // FIX TO Rd error outrange bug PGW 9 june 2004 - B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, - ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, - ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, - ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4 - }; - - MV bmvs[16]; - int beobs[16]; - - vpx_memset(beobs, 0, sizeof(beobs)); - - - for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++) - { - int label_count; - int this_segment_rd = 0; - int label_mv_thresh; - int rate = 0; - int sbr = 0; - int sbd = 0; - int sseshift; - int segmentyrate = 0; - - vp8_variance_fn_ptr_t *v_fn_ptr; - - ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; - ENTROPY_CONTEXT *ta_b; - ENTROPY_CONTEXT *tl_b; - - vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + B_PREDICTION_MODE this_mode; - ta = (ENTROPY_CONTEXT *)&t_above; - tl = (ENTROPY_CONTEXT *)&t_left; - ta_b = (ENTROPY_CONTEXT *)&t_above_b; - tl_b = (ENTROPY_CONTEXT *)&t_left_b; - br = 0; - bd = 0; + int label_count; + int this_segment_rd = 0; + int label_mv_thresh; + int rate = 0; + int sbr = 0; + int sbd = 0; + int segmentyrate = 0; - v_fn_ptr = &cpi->fn_ptr[segmentation]; - sseshift = segmentation_to_sseshift[segmentation]; - labels = vp8_mbsplits[segmentation]; - label_count = vp8_count_labels(labels); + vp8_variance_fn_ptr_t *v_fn_ptr; - // 64 makes this threshold really big effectively - // making it so that we very rarely check mvs on - // segments. setting this to 1 would make mv thresh - // roughly equal to what it is for macroblocks - label_mv_thresh = 1 * mvthresh / label_count ; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; + ENTROPY_CONTEXT *ta_b; + ENTROPY_CONTEXT *tl_b; - // Segmentation method overheads - rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - rate += vp8_cost_mv_ref(SPLITMV, mdcounts); + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + ta_b = (ENTROPY_CONTEXT *)&t_above_b; + tl_b = (ENTROPY_CONTEXT *)&t_left_b; - this_segment_rd += RDFUNC(x->rdmult, x->rddiv, rate, 0, cpi->target_bits_per_mb); - br += rate; + br = 0; + bd = 0; - for (i = 0; i < label_count; i++) - { - MV mode_mv[B_MODE_COUNT]; - int best_label_rd = INT_MAX; - B_PREDICTION_MODE mode_selected = ZERO4X4; - int j; - int bestlabelyrate = 0; + v_fn_ptr = &cpi->fn_ptr[segmentation]; + labels = vp8_mbsplits[segmentation]; + label_count = vp8_mbsplit_count[segmentation]; + + // 64 makes this threshold really big effectively + // making it so that we very rarely check mvs on + // segments. setting this to 1 would make mv thresh + // roughly equal to what it is for macroblocks + label_mv_thresh = 1 * bsi->mvthresh / label_count ; + + // Segmentation method overheads + rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); + rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); + this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); + br += rate; + + for (i = 0; i < label_count; i++) + { + MV mode_mv[B_MODE_COUNT]; + int best_label_rd = INT_MAX; + B_PREDICTION_MODE mode_selected = ZERO4X4; + int bestlabelyrate = 0; + + // search for the best motion vector on this segment + for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++) + { + int this_rd; + int distortion; + int labelyrate; + ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; + ENTROPY_CONTEXT *ta_s; + ENTROPY_CONTEXT *tl_s; + + vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); - b = &x->block[0]; - d = &x->e_mbd.block[0]; + ta_s = (ENTROPY_CONTEXT *)&t_above_s; + tl_s = (ENTROPY_CONTEXT *)&t_left_s; + if (this_mode == NEW4X4) + { + int sseshift; + int num00; + int step_param = 0; + int further_steps; + int n; + int thissme; + int bestsme = INT_MAX; + MV temp_mv; + BLOCK *c; + BLOCKD *e; - // find first label - for (j = 0; j < 16; j++) - if (labels[j] == i) + // Is the best so far sufficiently good that we cant justify doing and new motion search. + if (best_label_rd < label_mv_thresh) break; - c = &x->block[j]; - e = &x->e_mbd.block[j]; + if(cpi->compressor_speed) + { + if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) + { + bsi->mvp = &bsi->sv_mvp[i]; + if (i==1 && segmentation == BLOCK_16X8) bsi->mvp = &bsi->sv_mvp[2]; - // search for the best motion vector on this segment - for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++) - { - int distortion; - int this_rd; - int num00; - int labelyrate; - ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; - ENTROPY_CONTEXT *ta_s; - ENTROPY_CONTEXT *tl_s; + step_param = bsi->sv_istep[i]; + } - vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); + // use previous block's result as next block's MV predictor. + if (segmentation == BLOCK_4X4 && i>0) + { + bsi->mvp = &(x->e_mbd.block[i-1].bmi.mv.as_mv); + if (i==4 || i==8 || i==12) bsi->mvp = &(x->e_mbd.block[i-4].bmi.mv.as_mv); + step_param = 2; + } + } - ta_s = (ENTROPY_CONTEXT *)&t_above_s; - tl_s = (ENTROPY_CONTEXT *)&t_left_s; + further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - if (this_mode == NEW4X4) { - int step_param = 0; - int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - int n; - int thissme; - int bestsme = INT_MAX; - MV temp_mv; + int sadpb = x->sadperbit4; - // Is the best so far sufficiently good that we cant justify doing and new motion search. - if (best_label_rd < label_mv_thresh) - break; + // find first label + n = vp8_mbsplit_offset2[segmentation][i]; + c = &x->block[n]; + e = &x->e_mbd.block[n]; + + if (cpi->sf.search_method == HEX) + bestsme = vp8_hex_search(x, c, e, bsi->ref_mv, + &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); + + else { - int sadpb = x->sadperbit4; + bestsme = cpi->diamond_search_sad(x, c, e, bsi->mvp, + &mode_mv[NEW4X4], step_param, + sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); - if (cpi->sf.search_method == HEX) - bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); - else - { - bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); + n = num00; + num00 = 0; - n = num00; - num00 = 0; + while (n < further_steps) + { + n++; - while (n < further_steps) + if (num00) + num00--; + else { - n++; + thissme = cpi->diamond_search_sad(x, c, e, bsi->mvp, + &temp_mv, step_param + n, + sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); - if (num00) - num00--; - else + if (thissme < bestsme) { - thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); - - if (thissme < bestsme) - { - bestsme = thissme; - mode_mv[NEW4X4].row = temp_mv.row; - mode_mv[NEW4X4].col = temp_mv.col; - } + bestsme = thissme; + mode_mv[NEW4X4].row = temp_mv.row; + mode_mv[NEW4X4].col = temp_mv.col; } } } + } - // Should we do a full search (best quality only) - if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000) - { - thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost); + sseshift = segmentation_to_sseshift[segmentation]; - if (thissme < bestsme) - { - bestsme = thissme; - mode_mv[NEW4X4] = e->bmi.mv.as_mv; - } - else - { - // The full search result is actually worse so re-instate the previous best vector - e->bmi.mv.as_mv = mode_mv[NEW4X4]; - } + // Should we do a full search (best quality only) + if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) + { + thissme = cpi->full_search_sad(x, c, e, bsi->mvp, + sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost,bsi->ref_mv); + + if (thissme < bestsme) + { + bestsme = thissme; + mode_mv[NEW4X4] = e->bmi.mv.as_mv; + } + else + { + // The full search result is actually worse so re-instate the previous best vector + e->bmi.mv.as_mv = mode_mv[NEW4X4]; } } + } - if (bestsme < INT_MAX) - { - if (!fullpixel) - cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost); - else - vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost); - } + if (bestsme < INT_MAX) + { + if (!cpi->common.full_pixel) + cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], + bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost); + else + vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], + bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost); } + } /* NEW4X4 */ - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], best_ref_mv, mvcost); + rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + bsi->ref_mv, x->mvcost); - // Trap vectors that reach beyond the UMV borders - if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) || - ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max)) - { - continue; - } + // Trap vectors that reach beyond the UMV borders + if (((mode_mv[this_mode].row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].row >> 3) > x->mv_row_max) || + ((mode_mv[this_mode].col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].col >> 3) > x->mv_col_max)) + { + continue; + } - distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4; + distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4; - labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); - rate += labelyrate; + labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); + rate += labelyrate; - this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_label_rd) - { - sbr = rate; - sbd = distortion; - bestlabelyrate = labelyrate; - mode_selected = this_mode; - best_label_rd = this_rd; + if (this_rd < best_label_rd) + { + sbr = rate; + sbd = distortion; + bestlabelyrate = labelyrate; + mode_selected = this_mode; + best_label_rd = this_rd; - vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); - } } + } /*for each 4x4 mode*/ - vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); - labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost); + labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], + bsi->ref_mv, x->mvcost); - br += sbr; - bd += sbd; - segmentyrate += bestlabelyrate; - this_segment_rd += best_label_rd; + br += sbr; + bd += sbd; + segmentyrate += bestlabelyrate; + this_segment_rd += best_label_rd; - if ((this_segment_rd > best_rd) || (this_segment_rd > best_segment_rd)) - break; + if (this_segment_rd >= bsi->segment_rd) + break; + + } /* for each label */ + + if (this_segment_rd < bsi->segment_rd) + { + bsi->r = br; + bsi->d = bd; + bsi->segment_yrate = segmentyrate; + bsi->segment_rd = this_segment_rd; + bsi->segment_num = segmentation; + + // store everything needed to come back to this!! + for (i = 0; i < 16; i++) + { + BLOCKD *bd = &x->e_mbd.block[i]; + + bsi->mvs[i].as_mv = bd->bmi.mv.as_mv; + bsi->modes[i] = bd->bmi.mode; + bsi->eobs[i] = bd->eob; } + } +} + +static __inline +void vp8_cal_step_param(int sr, int *sp) +{ + int step = 0; - if ((this_segment_rd <= best_rd) && (this_segment_rd < best_segment_rd)) + if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP; + else if (sr < 1) sr = 1; + + while (sr>>=1) + step++; + + *sp = MAX_MVSEARCH_STEPS - 1 - step; +} + +static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, + MV *best_ref_mv, int best_rd, + int *mdcounts, int *returntotrate, + int *returnyrate, int *returndistortion, + int mvthresh) +{ + int i; + BEST_SEG_INFO bsi; + + vpx_memset(&bsi, 0, sizeof(bsi)); + + bsi.segment_rd = best_rd; + bsi.ref_mv = best_ref_mv; + bsi.mvp = best_ref_mv; + bsi.mvthresh = mvthresh; + bsi.mdcounts = mdcounts; + + for(i = 0; i < 16; i++) + { + bsi.modes[i] = ZERO4X4; + } + + if(cpi->compressor_speed == 0) + { + /* for now, we will keep the original segmentation order + when in best quality mode */ + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8); + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16); + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8); + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4); + } + else + { + int sr; + + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X8); + + if (bsi.segment_rd < best_rd) { - bsr = br; - bsd = bd; - bestsegmentyrate = segmentyrate; - best_segment_rd = this_segment_rd; - best_seg = segmentation; + int col_min = (best_ref_mv->col - MAX_FULL_PEL_VAL) >>3; + int col_max = (best_ref_mv->col + MAX_FULL_PEL_VAL) >>3; + int row_min = (best_ref_mv->row - MAX_FULL_PEL_VAL) >>3; + int row_max = (best_ref_mv->row + MAX_FULL_PEL_VAL) >>3; - // store everything needed to come back to this!! - for (i = 0; i < 16; i++) + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + + /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */ + if (x->mv_col_min < col_min ) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max ) + x->mv_col_max = col_max; + if (x->mv_row_min < row_min ) + x->mv_row_min = row_min; + if (x->mv_row_max > row_max ) + x->mv_row_max = row_max; + + /* Get 8x8 result */ + bsi.sv_mvp[0] = bsi.mvs[0].as_mv; + bsi.sv_mvp[1] = bsi.mvs[2].as_mv; + bsi.sv_mvp[2] = bsi.mvs[8].as_mv; + bsi.sv_mvp[3] = bsi.mvs[10].as_mv; + + /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */ + /* block 8X16 */ + { + sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[2].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[2].col))>>3); + vp8_cal_step_param(sr, &bsi.sv_istep[0]); + + sr = MAXF((abs(bsi.sv_mvp[1].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[1].col - bsi.sv_mvp[3].col))>>3); + vp8_cal_step_param(sr, &bsi.sv_istep[1]); + + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_8X16); + } + + /* block 16X8 */ { - BLOCKD *bd = &x->e_mbd.block[i]; + sr = MAXF((abs(bsi.sv_mvp[0].row - bsi.sv_mvp[1].row))>>3, (abs(bsi.sv_mvp[0].col - bsi.sv_mvp[1].col))>>3); + vp8_cal_step_param(sr, &bsi.sv_istep[0]); + + sr = MAXF((abs(bsi.sv_mvp[2].row - bsi.sv_mvp[3].row))>>3, (abs(bsi.sv_mvp[2].col - bsi.sv_mvp[3].col))>>3); + vp8_cal_step_param(sr, &bsi.sv_istep[1]); + + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_16X8); + } - bmvs[i] = bd->bmi.mv.as_mv; - bmodes[i] = bd->bmi.mode; - beobs[i] = bd->eob; + /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ + /* Not skip 4x4 if speed=0 (good quality) */ + if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ + { + bsi.mvp = &bsi.sv_mvp[0]; + vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4); } + + /* restore UMV window */ + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; } } - // set it to the best + /* set it to the best */ for (i = 0; i < 16; i++) { BLOCKD *bd = &x->e_mbd.block[i]; - bd->bmi.mv.as_mv = bmvs[i]; - bd->bmi.mode = bmodes[i]; - bd->eob = beobs[i]; + bd->bmi.mv.as_mv = bsi.mvs[i].as_mv; + bd->bmi.mode = bsi.modes[i]; + bd->eob = bsi.eobs[i]; + } + + *returntotrate = bsi.r; + *returndistortion = bsi.d; + *returnyrate = bsi.segment_yrate; + + /* save partitions */ + x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num; + x->partition_info->count = vp8_mbsplit_count[bsi.segment_num]; + + for (i = 0; i < x->partition_info->count; i++) + { + int j; + + j = vp8_mbsplit_offset2[bsi.segment_num][i]; + + x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode; + x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv; } - // Trap cases where the best split mode has all vectors coded 0,0 (or all the same) - if (FALSE) + return bsi.segment_rd; +} +#endif + +static void swap(int *x,int *y) +{ + int tmp; + + tmp = *x; + *x = *y; + *y = tmp; +} + +static void quicksortmv(int arr[],int left, int right) +{ + int lidx,ridx,pivot; + + lidx = left; + ridx = right; + + if( left < right) + { + pivot = (left + right)/2; + + while(lidx <=pivot && ridx >=pivot) + { + while(arr[lidx] < arr[pivot] && lidx <= pivot) + lidx++; + while(arr[ridx] > arr[pivot] && ridx >= pivot) + ridx--; + swap(&arr[lidx], &arr[ridx]); + lidx++; + ridx--; + if(lidx-1 == pivot) + { + ridx++; + pivot = ridx; + } + else if(ridx+1 == pivot) + { + lidx--; + pivot = lidx; + } + } + quicksortmv(arr, left, pivot - 1); + quicksortmv(arr, pivot + 1, right); + } +} + +static void quicksortsad(int arr[],int idx[], int left, int right) +{ + int lidx,ridx,pivot; + + lidx = left; + ridx = right; + + if( left < right) + { + pivot = (left + right)/2; + + while(lidx <=pivot && ridx >=pivot) + { + while(arr[lidx] < arr[pivot] && lidx <= pivot) + lidx++; + while(arr[ridx] > arr[pivot] && ridx >= pivot) + ridx--; + swap(&arr[lidx], &arr[ridx]); + swap(&idx[lidx], &idx[ridx]); + lidx++; + ridx--; + if(lidx-1 == pivot) + { + ridx++; + pivot = ridx; + } + else if(ridx+1 == pivot) + { + lidx--; + pivot = lidx; + } + } + quicksortsad(arr, idx, left, pivot - 1); + quicksortsad(arr, idx, pivot + 1, right); + } +} + +//The improved MV prediction +void vp8_mv_pred +( + VP8_COMP *cpi, + MACROBLOCKD *xd, + const MODE_INFO *here, + MV *mvp, + int refframe, + int *ref_frame_sign_bias, + int *sr, + int near_sadidx[] +) +{ + const MODE_INFO *above = here - xd->mode_info_stride; + const MODE_INFO *left = here - 1; + const MODE_INFO *aboveleft = above - 1; + int_mv near_mvs[8]; + int near_ref[8]; + int_mv mv; + int vcnt=0; + int find=0; + int mb_offset; + + int mvx[8]; + int mvy[8]; + int i; + + mv.as_int = 0; + + if(here->mbmi.ref_frame != INTRA_FRAME) { - int allsame = 1; + near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; + near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; - for (i = 1; i < 16; i++) + // read in 3 nearby block's MVs from current frame as prediction candidates. + if (above->mbmi.ref_frame != INTRA_FRAME) { - if ((bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row)) + near_mvs[vcnt].as_int = above->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = above->mbmi.ref_frame; + } + vcnt++; + if (left->mbmi.ref_frame != INTRA_FRAME) + { + near_mvs[vcnt].as_int = left->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = left->mbmi.ref_frame; + } + vcnt++; + if (aboveleft->mbmi.ref_frame != INTRA_FRAME) + { + near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int; + mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = aboveleft->mbmi.ref_frame; + } + vcnt++; + + // read in 5 nearby block's MVs from last frame. + if(cpi->common.last_frame_type != KEY_FRAME) + { + mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ; + + // current in last frame + if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) { - allsame = 0; - break; + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; + mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = cpi->lf_ref_frame[mb_offset]; + } + vcnt++; + + // above in last frame + if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int; + mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1]; + } + vcnt++; + + // left in last frame + if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int; + mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1]; + } + vcnt++; + + // right in last frame + if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int; + mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = cpi->lf_ref_frame[mb_offset +1]; + } + vcnt++; + + // below in last frame + if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME) + { + near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int; + mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); + near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1]; + } + vcnt++; + } + + for(i=0; i< vcnt; i++) + { + if(near_ref[near_sadidx[i]] != INTRA_FRAME) + { + if(here->mbmi.ref_frame == near_ref[near_sadidx[i]]) + { + mv.as_int = near_mvs[near_sadidx[i]].as_int; + find = 1; + if (i < 3) + *sr = 3; + else + *sr = 2; + break; + } } } - if (allsame) + if(!find) { - best_segment_rd = INT_MAX; + for(i=0; ie_mbd.mode_info_context->mbmi.partitioning = best_seg; - x->partition_info->count = vp8_count_labels(labels); + //calculate sad for current frame 3 nearby MBs. + if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0) + { + near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; + }else if(xd->mb_to_top_edge==0) + { //only has left MB for sad calculation. + near_sad[0] = near_sad[2] = INT_MAX; + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); + }else if(xd->mb_to_left_edge ==0) + { //only has left MB for sad calculation. + near_sad[1] = near_sad[2] = INT_MAX; + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); + }else + { + near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, 0x7fffffff); + near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, 0x7fffffff); + near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, 0x7fffffff); + } - for (i = 0; i < x->partition_info->count; i++) + if(cpi->common.last_frame_type != KEY_FRAME) { - int j; + //calculate sad for last frame 5 nearby MBs. + unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; + int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; - for (j = 0; j < 16; j++) - { - if (labels[j] == i) - break; - } + if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX; + if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX; + if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX; + if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX; - x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode; - x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv; + if(near_sad[4] != INT_MAX) + near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, 0x7fffffff); + if(near_sad[5] != INT_MAX) + near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer - 16, pre_y_stride, 0x7fffffff); + near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer, pre_y_stride, 0x7fffffff); + if(near_sad[6] != INT_MAX) + near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + 16, pre_y_stride, 0x7fffffff); + if(near_sad[7] != INT_MAX) + near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(x->src.y_buffer, x->src.y_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, 0x7fffffff); } - return best_segment_rd; + if(cpi->common.last_frame_type != KEY_FRAME) + { + quicksortsad(near_sad, near_sadidx, 0, 7); + }else + { + quicksortsad(near_sad, near_sadidx, 0, 2); + } } - +#if !(CONFIG_REALTIME_ONLY) int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra) { BLOCK *b = &x->block[0]; @@ -1451,6 +1752,8 @@ int rate2, distortion2; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); + int distortion_uv; + int best_yrd = INT_MAX; //int all_rds[MAX_MODES]; // Experimental debug code. //int all_rates[MAX_MODES]; @@ -1458,42 +1761,73 @@ //int intermodecost[MAX_MODES]; MB_PREDICTION_MODE uv_intra_mode; - int sse; - int sum; - int uvintra_eob = 0; - int tteob = 0; + int force_no_skip = 0; - *returnintra = INT_MAX; + MV mvp; + int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + int saddone=0; + int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean + MV frame_nearest_mv[4]; + MV frame_near_mv[4]; + MV frame_best_ref_mv[4]; + int frame_mdcounts[4][4]; + int frame_lf_or_gf[4]; + unsigned char *y_buffer[4]; + unsigned char *u_buffer[4]; + unsigned char *v_buffer[4]; - cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame + vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - x->skip = 0; + if (cpi->ref_frame_flags & VP8_LAST_FLAG) + { + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME], + &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias); + + y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset; + u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset; + v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset; - // Experimental code - // Adjust the RD multiplier based on the best case distortion we saw in the most recently coded mb - //if ( (cpi->last_mb_distortion) > 0 && (cpi->target_bits_per_mb > 0) ) - /*{ - int tmprdmult; - - //tmprdmult = (cpi->last_mb_distortion * 256) / ((cpi->av_per_frame_bandwidth*256)/cpi->common.MBs); - tmprdmult = (cpi->last_mb_distortion * 256) / cpi->target_bits_per_mb; - //tmprdmult = tmprdmult; - - //if ( tmprdmult > cpi->RDMULT * 2 ) - // tmprdmult = cpi->RDMULT * 2; - //else if ( tmprdmult < cpi->RDMULT / 2 ) - // tmprdmult = cpi->RDMULT / 2; + frame_lf_or_gf[LAST_FRAME] = 0; + } + + if (cpi->ref_frame_flags & VP8_GOLD_FLAG) + { + YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; + + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME], + &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias); + + y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset; + u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset; + v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset; + + frame_lf_or_gf[GOLDEN_FRAME] = 1; + } + + if (cpi->ref_frame_flags & VP8_ALT_FLAG) + { + YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; + + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME], + &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias); - //tmprdmult = (tmprdmult < 25) ? 25 : tmprdmult; + y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset; + u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset; + v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset; - //x->rdmult = tmprdmult; + frame_lf_or_gf[ALTREF_FRAME] = 1; + } + + *returnintra = INT_MAX; + cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame + + x->skip = 0; - }*/ + ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); // Special case treatment when GF and ARF are not sensible options for reference if (cpi->ref_frame_flags == VP8_LAST_FLAG) @@ -1524,19 +1858,13 @@ x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; - { - uvintra_eob = 0; - - for (i = 16; i < 24; i++) - uvintra_eob += x->e_mbd.block[i].eob; - } for (mode_index = 0; mode_index < MAX_MODES; mode_index++) { - int frame_cost; int this_rd = INT_MAX; int lf_or_gf = 0; // Lat Frame (01) or gf/arf (1) int disable_skip = 0; + int other_cost = 0; force_no_skip = 0; @@ -1551,97 +1879,58 @@ if (best_rd <= cpi->rd_threshes[mode_index]) continue; - - // These variables hold are rolling total cost and distortion for this mode rate2 = 0; distortion2 = 0; - // Where skip is allowable add in the default per mb cost for the no skip case. - // where we then decide to skip we have to delete this and replace it with the - // cost of signallying a skip - if (cpi->common.mb_no_coeff_skip) - { - rate2 += vp8_cost_bit(cpi->prob_skip_false, 0); - } - this_mode = vp8_mode_order[mode_index]; x->e_mbd.mode_info_context->mbmi.mode = this_mode; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; - //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. - if (cpi->is_src_frame_alt_ref) + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; } - if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) + /* everything but intra */ + if (x->e_mbd.mode_info_context->mbmi.ref_frame) { - YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - - if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) - continue; - - lf_or_gf = 0; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; - } - else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - { - YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; - - // not supposed to reference gold frame - if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) - continue; - - lf_or_gf = 1; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset; - } - else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME) - { - YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; - - // not supposed to reference alt ref frame - if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) - continue; - - //if ( !cpi->source_alt_ref_active ) - // continue; - - lf_or_gf = 1; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset; - } - - vp8_find_near_mvs(&x->e_mbd, - x->e_mbd.mode_info_context, - &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv, - mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias); - - - // Estimate the reference frame signaling cost and add it to the rolling cost variable. - frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; - rate2 += frame_cost; - - if (this_mode <= B_PRED) - { - for (i = 0; i < 16; i++) - { - vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO)); - } + x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts)); + lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame]; + } + + if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV) + { + if(!saddone) + { + vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] ); + saddone = 1; + } + + vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, + x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); + + /* adjust mvp to make sure it is within MV range */ + if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL; + else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL; + if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL; + else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL; } // Check to see if the testing frequency for this mode is at its max @@ -1668,110 +1957,80 @@ // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise if (cpi->zbin_mode_boost_enabled) { - if ((vp8_mode_order[mode_index] == ZEROMV) && (vp8_ref_frame_order[mode_index] != LAST_FRAME)) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else + if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME ) cpi->zbin_mode_boost = 0; + else + { + if (vp8_mode_order[mode_index] == ZEROMV) + { + if (vp8_ref_frame_order[mode_index] != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (vp8_ref_frame_order[mode_index] == SPLITMV) + cpi->zbin_mode_boost = 0; + else + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } - vp8cx_mb_init_quantizer(cpi, x); + vp8_update_zbin_extra(cpi, x); } switch (this_mode) { case B_PRED: + { + int tmp_rd; // Note the rate value returned here includes the cost of coding the BPRED mode : x->mbmode_cost[x->e_mbd.frame_type][BPRED]; - vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion); + tmp_rd = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion, best_yrd); rate2 += rate; - //rate_y = rate; distortion2 += distortion; - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - break; + + if(tmp_rd < best_yrd) + { + rate2 += uv_intra_rate; + rate_uv = uv_intra_rate_tokenonly; + distortion2 += uv_intra_distortion; + distortion_uv = uv_intra_distortion; + } + else + { + this_rd = INT_MAX; + disable_skip = 1; + } + } + break; case SPLITMV: { - int frame_cost_rd = RDFUNC(x->rdmult, x->rddiv, frame_cost, 0, cpi->target_bits_per_mb); - int saved_rate = rate2; - - // vp8_rd_pick_best_mbsegmentation looks only at Y and does not account for frame_cost. - // (best_rd - frame_cost_rd) is thus a conservative breakout number. - int breakout_rd = best_rd - frame_cost_rd; int tmp_rd; + int this_rd_thresh; - if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) - tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ; - else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ; - else - tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ; + this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) ? cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; + this_rd_thresh = (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[THR_NEWG]: this_rd_thresh; + + tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, + best_yrd, mdcounts, + &rate, &rate_y, &distortion, this_rd_thresh) ; rate2 += rate; distortion2 += distortion; // If even the 'Y' rd value of split is higher than best so far then dont bother looking at UV - if (tmp_rd < breakout_rd) + if (tmp_rd < best_yrd) { // Now work out UV cost and add it in - vp8_rd_inter_uv(cpi, x, &rate, &distortion, cpi->common.full_pixel); - rate2 += rate; - rate_uv = rate; - distortion2 += distortion; - + vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); + rate2 += rate_uv; + distortion2 += distortion_uv; } else { this_rd = INT_MAX; disable_skip = 1; } - - // Trap cases where the best split mode has all vectors coded 0,0 (or all the same) - if (0) - { - int allsame = 1; - - for (i = 1; i < 16; i++) - { - BLOCKD *bd = &x->e_mbd.block[i]; - - if (bd->bmi.mv.as_int != x->e_mbd.block[0].bmi.mv.as_int) //(bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row ) ) - { - allsame = 0; - break; - } - } - - if (allsame) - { - // reset mode and mv and jump to newmv - this_mode = NEWMV; - distortion2 = 0; - rate2 = saved_rate; - mode_mv[NEWMV].row = x->e_mbd.block[0].bmi.mv.as_mv.row; - mode_mv[NEWMV].col = x->e_mbd.block[0].bmi.mv.as_mv.col; - rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); - goto mv_selected; - } - } - - // trap cases where the 8x8s can be promoted to 8x16s or 16x8s - if (0)//x->partition_info->count == 4) - { - - if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int - && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int) - { - const int *labels = vp8_mbsplits[2]; - x->e_mbd.mode_info_context->mbmi.partitioning = 0; - rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2); - rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings); - //rate -= x->inter_bmode_costs[ x->partition_info->bmi[1]]; - //rate -= x->inter_bmode_costs[ x->partition_info->bmi[3]]; - x->partition_info->bmi[1] = x->partition_info->bmi[2]; - } - } - } break; case DC_PRED: @@ -1780,16 +2039,14 @@ case TM_PRED: x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_build_intra_predictors_mby_ptr(&x->e_mbd); - { - macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ; - rate2 += rate; - rate_y = rate; - distortion2 += distortion; - rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; - rate2 += uv_intra_rate; - rate_uv = uv_intra_rate_tokenonly; - distortion2 += uv_intra_distortion; - } + macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ; + rate2 += rate_y; + distortion2 += distortion; + rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; + rate2 += uv_intra_rate; + rate_uv = uv_intra_rate_tokenonly; + distortion2 += uv_intra_distortion; + distortion_uv = uv_intra_distortion; break; case NEWMV: @@ -1806,6 +2063,30 @@ int further_steps; int n; + int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3; + int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3; + int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3; + int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + + // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. + if (x->mv_col_min < col_min ) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max ) + x->mv_col_max = col_max; + if (x->mv_row_min < row_min ) + x->mv_row_min = row_min; + if (x->mv_row_max > row_max ) + x->mv_row_max = row_max; + + //adjust search range according to sr from mv prediction + if(sr > step_param) + step_param = sr; + // Work out how long a search we should do search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3; @@ -1820,13 +2101,13 @@ if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -1845,7 +2126,7 @@ num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9 if (thissme < bestsme) { @@ -1869,9 +2150,14 @@ { int thissme; int full_flag_thresh = 0; + MV full_mvp; + + full_mvp.row = d->bmi.mv.as_mv.row <<3; // use diamond search result as full search staring point + full_mvp.col = d->bmi.mv.as_mv.col <<3; // Update x->vector_range based on best vector found in step search - search_range = MAXF(abs(d->bmi.mv.as_mv.row), abs(d->bmi.mv.as_mv.col)); + search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); + //search_range *= 1.4; //didn't improve PSNR if (search_range > x->vector_range) x->vector_range = search_range; @@ -1880,9 +2166,13 @@ // Apply limits search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range; + + //add this to reduce full search range. + if(sr<=3 && search_range > 8) search_range = 8; + { int sadpb = x->sadperbit16 >> 2; - thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost); + thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv); } // Barrier threshold to initiating full search @@ -1905,6 +2195,11 @@ } } + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + if (bestsme < INT_MAX) // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11 cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost); @@ -1949,61 +2244,94 @@ vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); vp8_build_inter_predictors_mby(&x->e_mbd); - VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum); - if (cpi->active_map_enabled && x->active_ptr[0] == 0) - { + if (cpi->active_map_enabled && x->active_ptr[0] == 0) { x->skip = 1; } - else if (sse < x->encode_breakout) + else if (x->encode_breakout) { - // Check u and v to make sure skip is ok - int sse2 = 0; + int sum, sse; + int threshold = (xd->block[0].dequant[1] + * xd->block[0].dequant[1] >>4); + + if(threshold < x->encode_breakout) + threshold = x->encode_breakout; - sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); + VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var) + (x->src.y_buffer, x->src.y_stride, + x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum); - if (sse2 * 2 < x->encode_breakout) + if (sse < threshold) { - x->skip = 1; - distortion2 = sse; - rate2 = 500; + // Check u and v to make sure skip is ok + int sse2 = 0; + /* If theres is no codeable 2nd order dc + or a very small uniform pixel change change */ + if (abs(sum) < (xd->block[24].dequant[0]<<2)|| + ((sum * sum>>8) > sse && abs(sum) <128)) + { + sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); - disable_skip = 1; // We have no real rate data so trying to adjust for rate_y and rate_uv below will cause problems. - this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb); + if (sse2 * 2 < threshold) + { + x->skip = 1; + distortion2 = sse + sse2; + rate2 = 500; + + /* for best_yrd calculation */ + rate_uv = 0; + distortion_uv = sse2; + + disable_skip = 1; + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, + distortion2); - break; // (PGW) Move break here from below - for now at least + break; + } + } } - else - x->skip = 0; } + //intermodecost[mode_index] = vp8_cost_mv_ref(this_mode, mdcounts); // Experimental debug code // Add in the Mv/mode cost rate2 += vp8_cost_mv_ref(this_mode, mdcounts); // Y cost and distortion - macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)); - rate2 += rate; - rate_y = rate; + macro_block_yrd(x, &rate_y, &distortion, IF_RTCD(&cpi->rtcd.encodemb)); + rate2 += rate_y; distortion2 += distortion; // UV cost and distortion - vp8_rd_inter_uv(cpi, x, &rate, &distortion, cpi->common.full_pixel); - rate2 += rate; - rate_uv = rate; - distortion2 += distortion; + vp8_rd_inter_uv(cpi, x, &rate_uv, &distortion_uv, cpi->common.full_pixel); + rate2 += rate_uv; + distortion2 += distortion_uv; break; default: break; } + // Where skip is allowable add in the default per mb cost for the no skip case. + // where we then decide to skip we have to delete this and replace it with the + // cost of signallying a skip + if (cpi->common.mb_no_coeff_skip) + { + other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); + rate2 += other_cost; + } + + // Estimate the reference frame signaling cost and add it to the rolling cost variable. + rate2 += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + if (!disable_skip) { // Test for the condition where skip block will be activated because there are no non zero coefficients and make any necessary adjustment for rate if (cpi->common.mb_no_coeff_skip) { + int tteob; + tteob = 0; for (i = 0; i <= 24; i++) @@ -2013,77 +2341,24 @@ if (tteob == 0) { -#if 1 rate2 -= (rate_y + rate_uv); + //for best_yrd calculation + rate_uv = 0; // Back out no skip flag costing and add in skip flag costing if (cpi->prob_skip_false) { - rate2 += vp8_cost_bit(cpi->prob_skip_false, 1); - rate2 -= vp8_cost_bit(cpi->prob_skip_false, 0); - } + int prob_skip_cost; -#else - int rateuseskip; - int ratenotuseskip; - - - - ratenotuseskip = rate_y + rate_uv + vp8_cost_bit(cpi->prob_skip_false, 0); - rateuseskip = vp8_cost_bit(cpi->prob_skip_false, 1); - - if (1) // rateuseskipprob_skip_false, 0); - rateuseskip = vp8_cost_bit(cpi->prob_skip_false, 1); - - minrate = rateuseskip - ratenotuseskip; - - skip_rd = RDFUNC(x->rdmult, x->rddiv, minrate, maxdistortion - distortion2, cpi->target_bits_per_mb); - - if (skip_rd + 50 < 0 && x->e_mbd.mbmi.ref_frame != INTRA_FRAME && rate_y + rate_uv < 4000) - { - force_no_skip = 1; - rate2 = rate2 + rateuseskip - ratenotuseskip; - distortion2 = maxdistortion; - } - else - { - force_no_skip = 0; + prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); + prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0); + rate2 += prob_skip_cost; + other_cost += prob_skip_cost; } - } - -#endif - } - // Calculate the final RD estimate for this mode - this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } // Experimental debug code. @@ -2108,6 +2383,12 @@ x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode; } + other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; + + /* Calculate the final y RD estimate for this mode */ + best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost), + (distortion2-distortion_uv)); + *returnrate = rate2; *returndistortion = distortion2; best_rd = this_rd; @@ -2137,6 +2418,7 @@ if (x->skip) break; + } // Reduce the activation RD thresholds for the best choice mode @@ -2207,6 +2489,15 @@ } + if(best_mbmode.mode <= B_PRED) + { + int i; + for (i = 0; i < 16; i++) + { + best_bmodes[i].mv.as_int = 0; + } + } + // macroblock modes vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); @@ -2221,4 +2512,3 @@ return best_rd; } #endif - diff -Nru libvpx-0.9.5/vp8/encoder/rdopt.h libvpx-0.9.6/vp8/encoder/rdopt.h --- libvpx-0.9.5/vp8/encoder/rdopt.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/rdopt.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,10 +12,22 @@ #ifndef __INC_RDOPT_H #define __INC_RDOPT_H void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); -int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion); +int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd); int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion); int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_to, int *distortion); extern int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); +extern void vp8_mv_pred +( + VP8_COMP *cpi, + MACROBLOCKD *xd, + const MODE_INFO *here, + MV *mvp, + int refframe, + int *ref_frame_sign_bias, + int *sr, + int near_sadidx[] +); +void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]); #endif diff -Nru libvpx-0.9.5/vp8/encoder/sad_c.c libvpx-0.9.6/vp8/encoder/sad_c.c --- libvpx-0.9.5/vp8/encoder/sad_c.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/sad_c.c 2011-03-04 20:40:40.000000000 +0000 @@ -126,6 +126,24 @@ sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad16x16x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad16x8x3_c( const unsigned char *src_ptr, int src_stride, @@ -139,6 +157,24 @@ sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad16x8x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad8x8x3_c( const unsigned char *src_ptr, int src_stride, @@ -152,6 +188,24 @@ sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad8x8x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad8x16x3_c( const unsigned char *src_ptr, int src_stride, @@ -165,6 +219,24 @@ sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad8x16x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad4x4x3_c( const unsigned char *src_ptr, int src_stride, @@ -178,6 +250,24 @@ sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad4x4x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad16x16x4d_c( const unsigned char *src_ptr, int src_stride, diff -Nru libvpx-0.9.5/vp8/encoder/segmentation.h libvpx-0.9.6/vp8/encoder/segmentation.h --- libvpx-0.9.5/vp8/encoder/segmentation.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/segmentation.h 2011-03-04 20:40:40.000000000 +0000 @@ -10,7 +10,7 @@ #include "string.h" -#include "blockd.h" +#include "vp8/common/blockd.h" #include "onyx_int.h" extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x); diff -Nru libvpx-0.9.5/vp8/encoder/temporal_filter.c libvpx-0.9.6/vp8/encoder/temporal_filter.c --- libvpx-0.9.5/vp8/encoder/temporal_filter.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/temporal_filter.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,27 +9,26 @@ */ -#include "onyxc_int.h" +#include "vp8/common/onyxc_int.h" #include "onyx_int.h" -#include "systemdependent.h" +#include "vp8/common/systemdependent.h" #include "quantize.h" -#include "alloccommon.h" +#include "vp8/common/alloccommon.h" #include "mcomp.h" #include "firstpass.h" #include "psnr.h" #include "vpx_scale/vpxscale.h" -#include "extend.h" +#include "vp8/common/extend.h" #include "ratectrl.h" -#include "quant_common.h" +#include "vp8/common/quant_common.h" #include "segmentation.h" -#include "g_common.h" +#include "vp8/common/g_common.h" #include "vpx_scale/yv12extend.h" -#include "postproc.h" +#include "vp8/common/postproc.h" #include "vpx_mem/vpx_mem.h" -#include "swapyv12buffer.h" -#include "threading.h" +#include "vp8/common/swapyv12buffer.h" +#include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" -#include "vpxerrors.h" #include #include @@ -37,29 +36,9 @@ #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering -#define USE_FILTER_LUT 1 #if VP8_TEMPORAL_ALT_REF -#if USE_FILTER_LUT -static int modifier_lut[7][19] = -{ - // Strength=0 - {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=1 - {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=2 - {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=3 - {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=4 - {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=5 - {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, - // Strength=6 - {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} -}; -#endif -static void build_predictors_mb +static void vp8_temporal_filter_predictors_mb_c ( MACROBLOCKD *x, unsigned char *y_mb_ptr, @@ -79,21 +58,18 @@ if ((mv_row | mv_col) & 7) { -// vp8_sixtap_predict16x16_c(yptr, stride, -// mv_col & 7, mv_row & 7, &pred[0], 16); x->subpixel_predict16x16(yptr, stride, mv_col & 7, mv_row & 7, &pred[0], 16); } else { - //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16); RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16); } // U & V mv_row >>= 1; mv_col >>= 1; - stride >>= 1; + stride = (stride + 1) >> 1; offset = (mv_row >> 3) * stride + (mv_col >> 3); uptr = u_mb_ptr + offset; vptr = v_mb_ptr + offset; @@ -111,7 +87,7 @@ RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); } } -static void apply_temporal_filter +void vp8_temporal_filter_apply_c ( unsigned char *frame1, unsigned int stride, @@ -120,17 +96,13 @@ int strength, int filter_weight, unsigned int *accumulator, - unsigned int *count + unsigned short *count ) { int i, j, k; int modifier; int byte = 0; -#if USE_FILTER_LUT - int *lut = modifier_lut[strength]; -#endif - for (i = 0,k = 0; i < block_size; i++) { for (j = 0; j < block_size; j++, k++) @@ -139,23 +111,19 @@ int src_byte = frame1[byte]; int pixel_value = *frame2++; -#if USE_FILTER_LUT - // LUT implementation -- - // improves precision of filter - modifier = abs(src_byte-pixel_value); - modifier = modifier>18 ? 0 : lut[modifier]; -#else - modifier = src_byte; - modifier -= pixel_value; + modifier = src_byte - pixel_value; + // This is an integer approximation of: + // float coeff = (3.0 * modifer * modifier) / pow(2, strength); + // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); modifier *= modifier; - modifier >>= strength; modifier *= 3; + modifier += 1 << (strength - 1); + modifier >>= strength; if (modifier > 16) modifier = 16; modifier = 16 - modifier; -#endif modifier *= filter_weight; count[k] += modifier; @@ -171,7 +139,7 @@ #if ALT_REF_MC_ENABLED static int dummy_cost[2*mv_max+1]; -static int find_matching_mb +static int vp8_temporal_filter_find_matching_mb_c ( VP8_COMP *cpi, YV12_BUFFER_CONFIG *arf_frame, @@ -235,7 +203,7 @@ step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost); + mvsadcost, mvcost, &best_ref_mv1); } else { @@ -246,7 +214,7 @@ step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost); //sadpb < 9 + mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9 // Further step/diamond searches as necessary n = 0; @@ -268,7 +236,7 @@ step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost); //sadpb = 9 + mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9 if (thissme < bestsme) { @@ -292,7 +260,7 @@ bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], - cpi->mb.mvcost); + mvcost); } #endif @@ -308,7 +276,7 @@ } #endif -static void vp8cx_temp_blur1_c +static void vp8_temporal_filter_iterate_c ( VP8_COMP *cpi, int frame_count, @@ -321,17 +289,17 @@ int mb_col, mb_row; unsigned int filter_weight[MAX_LAG_BUFFERS]; unsigned char *mm_ptr = cpi->fp_motion_map; - int cols = cpi->common.mb_cols; - int rows = cpi->common.mb_rows; + int mb_cols = cpi->common.mb_cols; + int mb_rows = cpi->common.mb_rows; int MBs = cpi->common.MBs; int mb_y_offset = 0; int mb_uv_offset = 0; - unsigned int accumulator[384]; - unsigned int count[384]; + DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8); + DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8); MACROBLOCKD *mbd = &cpi->mb.e_mbd; YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; unsigned char *dst1, *dst2; - DECLARE_ALIGNED(16, unsigned char, predictor[384]); + DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); // Save input state unsigned char *y_buffer = mbd->pre.y_buffer; @@ -345,7 +313,7 @@ filter_weight[frame] = 1; } - for (mb_row = 0; mb_row < rows; mb_row++) + for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED // Reduced search extent by 3 for 6-tap filter & smaller UMV border @@ -354,14 +322,14 @@ + (VP8BORDERINPIXELS - 19); #endif - for (mb_col = 0; mb_col < cols; mb_col++) + for (mb_col = 0; mb_col < mb_cols; mb_col++) { int i, j, k, w; int weight_cap; int stride; vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); - vpx_memset(count, 0, 384*sizeof(unsigned int)); + vpx_memset(count, 0, 384*sizeof(unsigned short)); #if ALT_REF_MC_ENABLED // Reduced search extent by 3 for 6-tap filter & smaller UMV border @@ -412,11 +380,12 @@ #define THRESH_HIGH 20000 // Correlation has been lost try MC - err = find_matching_mb ( cpi, - cpi->frames[alt_ref_index], - cpi->frames[frame], - mb_y_offset, - THRESH_LOW ); + err = vp8_temporal_filter_find_matching_mb_c + (cpi, + cpi->frames[alt_ref_index], + cpi->frames[frame], + mb_y_offset, + THRESH_LOW); if (filter_weight[frame] < 2) { @@ -429,43 +398,46 @@ if (filter_weight[frame] != 0) { // Construct the predictors - build_predictors_mb ( - mbd, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->u_buffer + mb_uv_offset, - cpi->frames[frame]->v_buffer + mb_uv_offset, - cpi->frames[frame]->y_stride, - mbd->block[0].bmi.mv.as_mv.row, - mbd->block[0].bmi.mv.as_mv.col, - predictor ); + vp8_temporal_filter_predictors_mb_c + (mbd, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mbd->block[0].bmi.mv.as_mv.row, + mbd->block[0].bmi.mv.as_mv.col, + predictor); // Apply the filter (YUV) - apply_temporal_filter ( f->y_buffer + mb_y_offset, - f->y_stride, - predictor, - 16, - strength, - filter_weight[frame], - accumulator, - count ); - - apply_temporal_filter ( f->u_buffer + mb_uv_offset, - f->uv_stride, - predictor + 256, - 8, - strength, - filter_weight[frame], - accumulator + 256, - count + 256 ); - - apply_temporal_filter ( f->v_buffer + mb_uv_offset, - f->uv_stride, - predictor + 320, - 8, - strength, - filter_weight[frame], - accumulator + 320, - count + 320 ); + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) + (f->y_buffer + mb_y_offset, + f->y_stride, + predictor, + 16, + strength, + filter_weight[frame], + accumulator, + count); + + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) + (f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + 8, + strength, + filter_weight[frame], + accumulator + 256, + count + 256); + + TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply) + (f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 320, + 8, + strength, + filter_weight[frame], + accumulator + 320, + count + 320); } } @@ -524,8 +496,8 @@ mb_uv_offset += 8; } - mb_y_offset += 16*f->y_stride-f->y_width; - mb_uv_offset += 8*f->uv_stride-f->uv_width; + mb_y_offset += 16*(f->y_stride-mb_cols); + mb_uv_offset += 8*(f->uv_stride-mb_cols); } // Restore input state @@ -534,7 +506,7 @@ mbd->pre.v_buffer = v_buffer; } -void vp8cx_temp_filter_c +void vp8_temporal_filter_prepare_c ( VP8_COMP *cpi ) @@ -642,7 +614,7 @@ = &cpi->src_buffer[which_buffer].source_buffer; } - vp8cx_temp_blur1_c ( + vp8_temporal_filter_iterate_c ( cpi, frames_to_blur, frames_to_blur_backward, diff -Nru libvpx-0.9.5/vp8/encoder/temporal_filter.h libvpx-0.9.6/vp8/encoder/temporal_filter.h --- libvpx-0.9.5/vp8/encoder/temporal_filter.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/temporal_filter.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,8 +12,37 @@ #ifndef __INC_VP8_TEMPORAL_FILTER_H #define __INC_VP8_TEMPORAL_FILTER_H -#include "onyx_int.h" +#define prototype_apply(sym)\ + void (sym) \ + ( \ + unsigned char *frame1, \ + unsigned int stride, \ + unsigned char *frame2, \ + unsigned int block_size, \ + int strength, \ + int filter_weight, \ + unsigned int *accumulator, \ + unsigned short *count \ + ) -void vp8cx_temp_filter_c(VP8_COMP *cpi); +#if ARCH_X86 || ARCH_X86_64 +#include "x86/temporal_filter_x86.h" +#endif + +#ifndef vp8_temporal_filter_apply +#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c +#endif +extern prototype_apply(vp8_temporal_filter_apply); + +typedef struct +{ + prototype_apply(*apply); +} vp8_temporal_rtcd_vtable_t; + +#if CONFIG_RUNTIME_CPU_DETECT +#define TEMPORAL_INVOKE(ctx,fn) (ctx)->fn +#else +#define TEMPORAL_INVOKE(ctx,fn) vp8_temporal_filter_##fn +#endif #endif // __INC_VP8_TEMPORAL_FILTER_H diff -Nru libvpx-0.9.5/vp8/encoder/tokenize.c libvpx-0.9.6/vp8/encoder/tokenize.c --- libvpx-0.9.5/vp8/encoder/tokenize.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/tokenize.c 2011-03-04 20:40:40.000000000 +0000 @@ -132,8 +132,6 @@ t->Token = x; t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0); - t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); ++cpi->coef_counts [type] [band] [pt] [x]; @@ -185,7 +183,6 @@ t->Token = x; t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - t->section = frametype * BLOCK_TYPES * 2 + 2 * type + (c == 0); t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); ++cpi->coef_counts [type] [band] [pt] [x]; @@ -434,7 +431,6 @@ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt]; - t->section = 11; t->skip_eob_node = 0; ++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN]; ++t; @@ -465,7 +461,6 @@ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt]; - t->section = 8; t->skip_eob_node = 0; ++cpi->coef_counts [0] [1] [pt] [DCT_EOB_TOKEN]; ++t; @@ -495,7 +490,6 @@ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt]; - t->section = 13; t->skip_eob_node = 0; ++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN]; ++t; diff -Nru libvpx-0.9.5/vp8/encoder/tokenize.h libvpx-0.9.6/vp8/encoder/tokenize.h --- libvpx-0.9.5/vp8/encoder/tokenize.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/tokenize.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,7 +12,7 @@ #ifndef tokenize_h #define tokenize_h -#include "entropy.h" +#include "vp8/common/entropy.h" #include "block.h" void vp8_tokenize_initialize(); @@ -25,11 +25,10 @@ typedef struct { - int Token; - int Extra; const vp8_prob *context_tree; - int skip_eob_node; - int section; + short Extra; + unsigned char Token; + unsigned char skip_eob_node; } TOKENEXTRA; int rd_cost_mby(MACROBLOCKD *); diff -Nru libvpx-0.9.5/vp8/encoder/treewriter.h libvpx-0.9.6/vp8/encoder/treewriter.h --- libvpx-0.9.5/vp8/encoder/treewriter.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/treewriter.h 2011-03-04 20:40:40.000000000 +0000 @@ -15,7 +15,7 @@ /* Trees map alphabets into huffman-like codes suitable for an arithmetic bit coder. Timothy S Murphy 11 October 2004 */ -#include "treecoder.h" +#include "vp8/common/treecoder.h" #include "boolhuff.h" /* for now */ diff -Nru libvpx-0.9.5/vp8/encoder/variance.h libvpx-0.9.6/vp8/encoder/variance.h --- libvpx-0.9.5/vp8/encoder/variance.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/variance.h 2011-03-04 20:40:40.000000000 +0000 @@ -32,6 +32,16 @@ unsigned int *sad_array\ ) +#define prototype_sad_multi_same_address_1(sym)\ + void (sym)\ + (\ + const unsigned char *src_ptr, \ + int source_stride, \ + const unsigned char *ref_ptr, \ + int ref_stride, \ + unsigned short *sad_array\ + ) + #define prototype_sad_multi_dif_address(sym)\ void (sym)\ (\ @@ -138,6 +148,31 @@ #endif extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3); +#ifndef vp8_variance_sad16x16x8 +#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8); + +#ifndef vp8_variance_sad16x8x8 +#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8); + +#ifndef vp8_variance_sad8x8x8 +#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8); + +#ifndef vp8_variance_sad8x16x8 +#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8); + +#ifndef vp8_variance_sad4x4x8 +#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8); + //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #ifndef vp8_variance_sad16x16x4d @@ -274,6 +309,7 @@ typedef prototype_sad(*vp8_sad_fn_t); typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); +typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t); typedef prototype_variance(*vp8_variance_fn_t); typedef prototype_variance2(*vp8_variance2_fn_t); @@ -317,6 +353,12 @@ vp8_sad_multi_fn_t sad8x8x3; vp8_sad_multi_fn_t sad4x4x3; + vp8_sad_multi1_fn_t sad16x16x8; + vp8_sad_multi1_fn_t sad16x8x8; + vp8_sad_multi1_fn_t sad8x16x8; + vp8_sad_multi1_fn_t sad8x8x8; + vp8_sad_multi1_fn_t sad4x4x8; + vp8_sad_multi_d_fn_t sad16x16x4d; vp8_sad_multi_d_fn_t sad16x8x4d; vp8_sad_multi_d_fn_t sad8x16x4d; @@ -334,6 +376,7 @@ vp8_variance_fn_t svf_halfpix_v; vp8_variance_fn_t svf_halfpix_hv; vp8_sad_multi_fn_t sdx3f; + vp8_sad_multi1_fn_t sdx8f; vp8_sad_multi_d_fn_t sdx4df; } vp8_variance_fn_ptr_t; diff -Nru libvpx-0.9.5/vp8/encoder/x86/dct_mmx.asm libvpx-0.9.6/vp8/encoder/x86/dct_mmx.asm --- libvpx-0.9.5/vp8/encoder/x86/dct_mmx.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/dct_mmx.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,511 +11,231 @@ %include "vpx_ports/x86_abi_support.asm" -section .text - global sym(vp8_short_fdct4x4_mmx) - global sym(vp8_short_fdct8x4_wmt) - - -%define DCTCONSTANTSBITS (16) -%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1)) -%define x_c1 (60547) ; cos(pi /8) * (1<<15) -%define x_c2 (46341) ; cos(pi*2/8) * (1<<15) -%define x_c3 (25080) ; cos(pi*3/8) * (1<<15) - - ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) +global sym(vp8_short_fdct4x4_mmx) sym(vp8_short_fdct4x4_mmx): push rbp - mov rbp, rsp + mov rbp, rsp SHADOW_ARGS_TO_STACK 3 GET_GOT rbx - push rsi - push rdi + push rsi + push rdi ; end prolog - mov rsi, arg(0) ;input - mov rdi, arg(1) ;output - lea rdx, [GLOBAL(dct_const_mmx)] - movsxd rax, dword ptr arg(2) ;pitch + mov rsi, arg(0) ; input + mov rdi, arg(1) ; output - lea rcx, [rsi + rax*2] - ; read the input data - movq mm0, [rsi] - movq mm1, [rsi + rax ] + movsxd rax, dword ptr arg(2) ;pitch - movq mm2, [rcx] - movq mm3, [rcx + rax] - ; get the constants - ;shift to left by 1 for prescision - psllw mm0, 3 - psllw mm1, 3 + lea rcx, [rsi + rax*2] + ; read the input data + movq mm0, [rsi] + movq mm1, [rsi + rax] - psllw mm2, 3 - psllw mm3, 3 + movq mm2, [rcx] + movq mm4, [rcx + rax] - ; transpose for the second stage - movq mm4, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 10 11 12 03 + ; transpose for the first stage + movq mm3, mm0 ; 00 01 02 03 + movq mm5, mm2 ; 20 21 22 23 - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm4, mm1 ; 02 12 03 13 + punpcklwd mm0, mm1 ; 00 10 01 11 + punpckhwd mm3, mm1 ; 02 12 03 13 - punpcklwd mm2, mm3 ; 20 30 21 31 - punpckhwd mm5, mm3 ; 22 32 23 33 + punpcklwd mm2, mm4 ; 20 30 21 31 + punpckhwd mm5, mm4 ; 22 32 23 33 + movq mm1, mm0 ; 00 10 01 11 + punpckldq mm0, mm2 ; 00 10 20 30 - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 + punpckhdq mm1, mm2 ; 01 11 21 31 - punpckhdq mm1, mm2 ; 01 11 21 31 + movq mm2, mm3 ; 02 12 03 13 + punpckldq mm2, mm5 ; 02 12 22 32 - movq mm2, mm4 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 - - punpckhdq mm4, mm5 ; 03 13 23 33 - movq mm3, mm4 + punpckhdq mm3, mm5 ; 03 13 23 33 + ; mm0 0 + ; mm1 1 + ; mm2 2 + ; mm3 3 ; first stage - movq mm5, mm0 - movq mm4, mm1 - - paddw mm0, mm3 ; a = 0 + 3 - paddw mm1, mm2 ; b = 1 + 2 - - psubw mm4, mm2 ; c = 1 - 2 - psubw mm5, mm3 ; d = 0 - 3 - - - ; output 0 and 2 - movq mm6, [rdx + 16] ; c2 - movq mm2, mm0 ; a + movq mm5, mm0 + movq mm4, mm1 - paddw mm0, mm1 ; a + b - psubw mm2, mm1 ; a - b + paddw mm0, mm3 ; a1 = 0 + 3 + paddw mm1, mm2 ; b1 = 1 + 2 - movq mm1, mm0 ; a + b - pmulhw mm0, mm6 ; 00 01 02 03 + psubw mm4, mm2 ; c1 = 1 - 2 + psubw mm5, mm3 ; d1 = 0 - 3 - paddw mm0, mm1 ; output 00 01 02 03 - pmulhw mm6, mm2 ; 20 21 22 23 - - paddw mm2, mm6 ; output 20 21 22 23 - - ; output 1 and 3 - movq mm6, [rdx + 8] ; c1 - movq mm7, [rdx + 24] ; c3 - - movq mm1, mm4 ; c - movq mm3, mm5 ; d - - pmulhw mm1, mm7 ; c * c3 - pmulhw mm3, mm6 ; d * c1 - - paddw mm3, mm5 ; d * c1 rounded - paddw mm1, mm3 ; output 10 11 12 13 - - movq mm3, mm4 ; c - pmulhw mm5, mm7 ; d * c3 - - pmulhw mm4, mm6 ; c * c1 - paddw mm3, mm4 ; round c* c1 - - psubw mm5, mm3 ; output 30 31 32 33 - movq mm3, mm5 - - - ; done with vertical - ; transpose for the second stage - movq mm4, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 10 11 12 03 - - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm4, mm1 ; 02 12 03 13 - - punpcklwd mm2, mm3 ; 20 30 21 31 - punpckhwd mm5, mm3 ; 22 32 23 33 - - - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 - - punpckhdq mm1, mm2 ; 01 11 21 31 - - movq mm2, mm4 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 - - punpckhdq mm4, mm5 ; 03 13 23 33 - movq mm3, mm4 - - - ; first stage - movq mm5, mm0 - movq mm4, mm1 - - paddw mm0, mm3 ; a = 0 + 3 - paddw mm1, mm2 ; b = 1 + 2 - - psubw mm4, mm2 ; c = 1 - 2 - psubw mm5, mm3 ; d = 0 - 3 + psllw mm5, 3 + psllw mm4, 3 + psllw mm0, 3 + psllw mm1, 3 ; output 0 and 2 - movq mm6, [rdx + 16] ; c2 - movq mm2, mm0 ; a - paddw mm0, mm1 ; a + b - - psubw mm2, mm1 ; a - b - - movq mm1, mm0 ; a + b - pmulhw mm0, mm6 ; 00 01 02 03 - - paddw mm0, mm1 ; output 00 01 02 03 - pmulhw mm6, mm2 ; 20 21 22 23 - - paddw mm2, mm6 ; output 20 21 22 23 + movq mm2, mm0 ; a1 + paddw mm0, mm1 ; op[0] = a1 + b1 + psubw mm2, mm1 ; op[2] = a1 - b1 ; output 1 and 3 - movq mm6, [rdx + 8] ; c1 - movq mm7, [rdx + 24] ; c3 - - movq mm1, mm4 ; c - movq mm3, mm5 ; d - - pmulhw mm1, mm7 ; c * c3 - pmulhw mm3, mm6 ; d * c1 - - paddw mm3, mm5 ; d * c1 rounded - paddw mm1, mm3 ; output 10 11 12 13 + ; interleave c1, d1 + movq mm1, mm5 ; d1 + punpcklwd mm1, mm4 ; c1 d1 + punpckhwd mm5, mm4 ; c1 d1 + + movq mm3, mm1 + movq mm4, mm5 + + pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + + pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + + paddd mm1, MMWORD PTR[GLOBAL(_14500)] + paddd mm4, MMWORD PTR[GLOBAL(_14500)] + paddd mm3, MMWORD PTR[GLOBAL(_7500)] + paddd mm5, MMWORD PTR[GLOBAL(_7500)] + + psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 - movq mm3, mm4 ; c - pmulhw mm5, mm7 ; d * c3 + packssdw mm1, mm4 ; op[1] + packssdw mm3, mm5 ; op[3] - pmulhw mm4, mm6 ; c * c1 - paddw mm3, mm4 ; round c* c1 - - psubw mm5, mm3 ; output 30 31 32 33 - movq mm3, mm5 ; done with vertical - - pcmpeqw mm4, mm4 - pcmpeqw mm5, mm5 - psrlw mm4, 15 - psrlw mm5, 15 - - psllw mm4, 2 - psllw mm5, 2 - - paddw mm0, mm4 - paddw mm1, mm5 - paddw mm2, mm4 - paddw mm3, mm5 - - psraw mm0, 3 - psraw mm1, 3 - psraw mm2, 3 - psraw mm3, 3 - - movq [rdi ], mm0 - movq [rdi+ 8], mm1 - movq [rdi+16], mm2 - movq [rdi+24], mm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch) -sym(vp8_short_fdct8x4_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;input - mov rdi, arg(1) ;output - - lea rdx, [GLOBAL(dct_const_xmm)] - movsxd rax, dword ptr arg(2) ;pitch - - lea rcx, [rsi + rax*2] - ; read the input data - movdqa xmm0, [rsi] - movdqa xmm2, [rsi + rax] - - movdqa xmm4, [rcx] - movdqa xmm3, [rcx + rax] - ; get the constants - ;shift to left by 1 for prescision - psllw xmm0, 3 - psllw xmm2, 3 - - psllw xmm4, 3 - psllw xmm3, 3 - ; transpose for the second stage - movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 - movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 + movq mm4, mm0 ; 00 10 20 30 + movq mm5, mm2 ; 02 12 22 32 - punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 - punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 + punpcklwd mm0, mm1 ; 00 01 10 11 + punpckhwd mm4, mm1 ; 20 21 30 31 - punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 - punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 + punpcklwd mm2, mm3 ; 02 03 12 13 + punpckhwd mm5, mm3 ; 22 23 32 33 - movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 - punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 + movq mm1, mm0 ; 00 01 10 11 + punpckldq mm0, mm2 ; 00 01 02 03 - punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 + punpckhdq mm1, mm2 ; 01 22 12 13 + movq mm2, mm4 ; 20 31 30 31 + punpckldq mm2, mm5 ; 20 21 22 23 - movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 - punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 + punpckhdq mm4, mm5 ; 30 31 32 33 - punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 - movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 + ; mm0 0 + ; mm1 1 + ; mm2 2 + ; mm3 4 - punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 - punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 + movq mm5, mm0 + movq mm3, mm1 - movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 - punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 + paddw mm0, mm4 ; a1 = 0 + 3 + paddw mm1, mm2 ; b1 = 1 + 2 - punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 + psubw mm3, mm2 ; c1 = 1 - 2 + psubw mm5, mm4 ; d1 = 0 - 3 - ; xmm0 0 - ; xmm1 1 - ; xmm2 2 - ; xmm3 3 + pxor mm6, mm6 ; zero out for compare - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a = 0 + 3 - paddw xmm1, xmm2 ; b = 1 + 2 - - psubw xmm4, xmm2 ; c = 1 - 2 - psubw xmm5, xmm3 ; d = 0 - 3 + pcmpeqw mm6, mm5 ; d1 != 0 + pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper, + ; and keep bit 0 of lower ; output 0 and 2 - movdqa xmm6, [rdx + 32] ; c2 - movdqa xmm2, xmm0 ; a + movq mm2, mm0 ; a1 - paddw xmm0, xmm1 ; a + b - psubw xmm2, xmm1 ; a - b + paddw mm0, mm1 ; a1 + b1 + psubw mm2, mm1 ; a1 - b1 - movdqa xmm1, xmm0 ; a + b - pmulhw xmm0, xmm6 ; 00 01 02 03 + paddw mm0, MMWORD PTR[GLOBAL(_7w)] + paddw mm2, MMWORD PTR[GLOBAL(_7w)] - paddw xmm0, xmm1 ; output 00 01 02 03 - pmulhw xmm6, xmm2 ; 20 21 22 23 + psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4 + psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4 - paddw xmm2, xmm6 ; output 20 21 22 23 + movq MMWORD PTR[rdi + 0 ], mm0 + movq MMWORD PTR[rdi + 16], mm2 ; output 1 and 3 - movdqa xmm6, [rdx + 16] ; c1 - movdqa xmm7, [rdx + 48] ; c3 - - movdqa xmm1, xmm4 ; c - movdqa xmm3, xmm5 ; d - - pmulhw xmm1, xmm7 ; c * c3 - pmulhw xmm3, xmm6 ; d * c1 - - paddw xmm3, xmm5 ; d * c1 rounded - paddw xmm1, xmm3 ; output 10 11 12 13 - - movdqa xmm3, xmm4 ; c - pmulhw xmm5, xmm7 ; d * c3 - - pmulhw xmm4, xmm6 ; c * c1 - paddw xmm3, xmm4 ; round c* c1 - - psubw xmm5, xmm3 ; output 30 31 32 33 - movdqa xmm3, xmm5 - - - ; done with vertical - ; transpose for the second stage - movdqa xmm4, xmm2 ; 02 12 22 32 06 16 26 36 - movdqa xmm2, xmm1 ; 01 11 21 31 05 15 25 35 + ; interleave c1, d1 + movq mm1, mm5 ; d1 + punpcklwd mm1, mm3 ; c1 d1 + punpckhwd mm5, mm3 ; c1 d1 - movdqa xmm1, xmm0 ; 00 10 20 30 04 14 24 34 - movdqa xmm5, xmm4 ; 02 12 22 32 06 16 26 36 + movq mm3, mm1 + movq mm4, mm5 - punpcklwd xmm0, xmm2 ; 00 01 10 11 20 21 30 31 - punpckhwd xmm1, xmm2 ; 04 05 14 15 24 25 34 35 + pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - punpcklwd xmm4, xmm3 ; 02 03 12 13 22 23 32 33 - punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 + pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - movdqa xmm2, xmm0 ; 00 01 10 11 20 21 30 31 - punpckldq xmm0, xmm4 ; 00 01 02 03 10 11 12 13 + paddd mm1, MMWORD PTR[GLOBAL(_12000)] + paddd mm4, MMWORD PTR[GLOBAL(_12000)] + paddd mm3, MMWORD PTR[GLOBAL(_51000)] + paddd mm5, MMWORD PTR[GLOBAL(_51000)] - punpckhdq xmm2, xmm4 ; 20 21 22 23 30 31 32 33 + psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + packssdw mm1, mm4 ; op[4] + packssdw mm3, mm5 ; op[12] - movdqa xmm4, xmm1 ; 04 05 14 15 24 25 34 35 - punpckldq xmm4, xmm5 ; 04 05 06 07 14 15 16 17 + paddw mm1, mm6 ; op[4] += (d1!=0) - punpckhdq xmm1, xmm5 ; 24 25 26 27 34 35 36 37 - movdqa xmm3, xmm2 ; 20 21 22 23 30 31 32 33 + movq MMWORD PTR[rdi + 8 ], mm1 + movq MMWORD PTR[rdi + 24], mm3 - punpckhqdq xmm3, xmm1 ; 30 31 32 33 34 35 36 37 - punpcklqdq xmm2, xmm1 ; 20 21 22 23 24 25 26 27 - - movdqa xmm1, xmm0 ; 00 01 02 03 10 11 12 13 - punpcklqdq xmm0, xmm4 ; 00 01 02 03 04 05 06 07 - - punpckhqdq xmm1, xmm4 ; 10 11 12 13 14 15 16 17 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a = 0 + 3 - paddw xmm1, xmm2 ; b = 1 + 2 - - psubw xmm4, xmm2 ; c = 1 - 2 - psubw xmm5, xmm3 ; d = 0 - 3 - - - ; output 0 and 2 - movdqa xmm6, [rdx + 32] ; c2 - movdqa xmm2, xmm0 ; a - - paddw xmm0, xmm1 ; a + b - psubw xmm2, xmm1 ; a - b - - movdqa xmm1, xmm0 ; a + b - pmulhw xmm0, xmm6 ; 00 01 02 03 - - paddw xmm0, xmm1 ; output 00 01 02 03 - pmulhw xmm6, xmm2 ; 20 21 22 23 - - paddw xmm2, xmm6 ; output 20 21 22 23 - - ; output 1 and 3 - movdqa xmm6, [rdx + 16] ; c1 - movdqa xmm7, [rdx + 48] ; c3 - - movdqa xmm1, xmm4 ; c - movdqa xmm3, xmm5 ; d - - pmulhw xmm1, xmm7 ; c * c3 - pmulhw xmm3, xmm6 ; d * c1 - - paddw xmm3, xmm5 ; d * c1 rounded - paddw xmm1, xmm3 ; output 10 11 12 13 - - movdqa xmm3, xmm4 ; c - pmulhw xmm5, xmm7 ; d * c3 - - pmulhw xmm4, xmm6 ; c * c1 - paddw xmm3, xmm4 ; round c* c1 - - psubw xmm5, xmm3 ; output 30 31 32 33 - movdqa xmm3, xmm5 - ; done with vertical - - - pcmpeqw xmm4, xmm4 - pcmpeqw xmm5, xmm5; - psrlw xmm4, 15 - psrlw xmm5, 15 - - psllw xmm4, 2 - psllw xmm5, 2 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - psraw xmm0, 3 - psraw xmm1, 3 - psraw xmm2, 3 - psraw xmm3, 3 - - movq QWORD PTR[rdi ], xmm0 - movq QWORD PTR[rdi+ 8], xmm1 - movq QWORD PTR[rdi+16], xmm2 - movq QWORD PTR[rdi+24], xmm3 - - psrldq xmm0, 8 - psrldq xmm1, 8 - psrldq xmm2, 8 - psrldq xmm3, 8 - - movq QWORD PTR[rdi+32], xmm0 - movq QWORD PTR[rdi+40], xmm1 - movq QWORD PTR[rdi+48], xmm2 - movq QWORD PTR[rdi+56], xmm3 - ; begin epilog - pop rdi - pop rsi + ; begin epilog + pop rdi + pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret - SECTION_RODATA -;static const unsigned int dct1st_stage_rounding_mmx[2] = -align 16 -dct1st_stage_rounding_mmx: - times 2 dd 8192 - - -;static const unsigned int dct2nd_stage_rounding_mmx[2] = -align 16 -dct2nd_stage_rounding_mmx: - times 2 dd 32768 - - -;static const short dct_matrix[4][4]= -align 16 -dct_matrix: - times 4 dw 23170 - - dw 30274 - dw 12540 - dw -12540 - dw -30274 - - dw 23170 - times 2 dw -23170 - dw 23170 - - dw 12540 - dw -30274 - dw 30274 - dw -12540 - - -;static const unsigned short dct_const_mmx[4 * 4]= -align 16 -dct_const_mmx: - times 4 dw 0 - times 4 dw 60547 - times 4 dw 46341 - times 4 dw 25080 - - -;static const unsigned short dct_const_xmm[8 * 4]= -align 16 -dct_const_xmm: - times 8 dw 0 - times 8 dw 60547 - times 8 dw 46341 - times 8 dw 25080 +align 8 +_5352_2217: + dw 5352 + dw 2217 + dw 5352 + dw 2217 +align 8 +_2217_neg5352: + dw 2217 + dw -5352 + dw 2217 + dw -5352 +align 8 +_cmp_mask: + times 4 dw 1 +align 8 +_7w: + times 4 dw 7 +align 8 +_14500: + times 2 dd 14500 +align 8 +_7500: + times 2 dd 7500 +align 8 +_12000: + times 2 dd 12000 +align 8 +_51000: + times 2 dd 51000 diff -Nru libvpx-0.9.5/vp8/encoder/x86/dct_sse2.asm libvpx-0.9.6/vp8/encoder/x86/dct_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/dct_sse2.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/dct_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,32 +11,68 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) -global sym(vp8_short_fdct4x4_sse2) -sym(vp8_short_fdct4x4_sse2): +%macro STACK_FRAME_CREATE 0 +%if ABI_IS_32BIT + %define input rsi + %define output rdi + %define pitch rax push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 -;; SAVE_XMM GET_GOT rbx push rsi push rdi ; end prolog mov rsi, arg(0) - movsxd rax, DWORD PTR arg(2) - lea rdi, [rsi + rax*2] + mov rdi, arg(1) + + movsxd rax, dword ptr arg(2) + lea rcx, [rsi + rax*2] +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define input rcx + %define output rdx + %define pitch r8 + %else + %define input rdi + %define output rsi + %define pitch rdx + %endif +%endif +%endmacro + +%macro STACK_FRAME_DESTROY 0 + %define input + %define output + %define pitch + +%if ABI_IS_32BIT + pop rdi + pop rsi + RESTORE_GOT + pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + %endif +%endif + ret +%endmacro + +;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) +global sym(vp8_short_fdct4x4_sse2) +sym(vp8_short_fdct4x4_sse2): + + STACK_FRAME_CREATE - movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00 - movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10 - movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20 - movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30 + movq xmm0, MMWORD PTR[input ] ;03 02 01 00 + movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 + lea input, [input+2*pitch] + movq xmm1, MMWORD PTR[input ] ;23 22 21 20 + movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 - mov rdi, arg(1) - movdqa xmm2, xmm0 punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 @@ -51,6 +87,7 @@ psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 + movdqa xmm1, xmm0 pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 @@ -121,17 +158,216 @@ punpcklqdq xmm0, xmm3 ;op[4] op[0] punpckhqdq xmm1, xmm3 ;op[12] op[8] - movdqa XMMWORD PTR[rdi + 0], xmm0 - movdqa XMMWORD PTR[rdi + 16], xmm1 + movdqa XMMWORD PTR[output + 0], xmm0 + movdqa XMMWORD PTR[output + 16], xmm1 - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT -;; RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY + +;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) +global sym(vp8_short_fdct8x4_sse2) +sym(vp8_short_fdct8x4_sse2): + + STACK_FRAME_CREATE + + ; read the input data + movdqa xmm0, [input ] + movdqa xmm2, [input+ pitch] + lea input, [input+2*pitch] + movdqa xmm4, [input ] + movdqa xmm3, [input+ pitch] + + ; transpose for the first stage + movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 + movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 + + punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 + punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 + + punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 + punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 + + movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 + punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 + + punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 + + movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 + punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 + + punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 + movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 + + punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 + punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 + + movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 + punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 + + punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 + + ; xmm0 0 + ; xmm1 1 + ; xmm2 2 + ; xmm3 3 + + ; first stage + movdqa xmm5, xmm0 + movdqa xmm4, xmm1 + + paddw xmm0, xmm3 ; a1 = 0 + 3 + paddw xmm1, xmm2 ; b1 = 1 + 2 + + psubw xmm4, xmm2 ; c1 = 1 - 2 + psubw xmm5, xmm3 ; d1 = 0 - 3 + + psllw xmm5, 3 + psllw xmm4, 3 + + psllw xmm0, 3 + psllw xmm1, 3 + + ; output 0 and 2 + movdqa xmm2, xmm0 ; a1 + + paddw xmm0, xmm1 ; op[0] = a1 + b1 + psubw xmm2, xmm1 ; op[2] = a1 - b1 + + ; output 1 and 3 + ; interleave c1, d1 + movdqa xmm1, xmm5 ; d1 + punpcklwd xmm1, xmm4 ; c1 d1 + punpckhwd xmm5, xmm4 ; c1 d1 + + movdqa xmm3, xmm1 + movdqa xmm4, xmm5 + + pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + + paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] + paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] + + psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + + packssdw xmm1, xmm4 ; op[1] + packssdw xmm3, xmm5 ; op[3] + + ; done with vertical + ; transpose for the second stage + movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 + movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 + + punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 + punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 + + punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 + punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 + + movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 + punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 + + punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 + + movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 + punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 + + punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 + movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 + + punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 + punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 + + movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 + punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 + + punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 + + ; xmm0 0 + ; xmm1 4 + ; xmm2 1 + ; xmm3 3 + + movdqa xmm5, xmm0 + movdqa xmm2, xmm1 + + paddw xmm0, xmm3 ; a1 = 0 + 3 + paddw xmm1, xmm4 ; b1 = 1 + 2 + + psubw xmm4, xmm2 ; c1 = 1 - 2 + psubw xmm5, xmm3 ; d1 = 0 - 3 + + pxor xmm6, xmm6 ; zero out for compare + + pcmpeqw xmm6, xmm5 ; d1 != 0 + + pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, + ; and keep bit 0 of lower + + ; output 0 and 2 + movdqa xmm2, xmm0 ; a1 + + paddw xmm0, xmm1 ; a1 + b1 + psubw xmm2, xmm1 ; a1 - b1 + + paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] + paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] + + psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 + psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 + + ; output 1 and 3 + ; interleave c1, d1 + movdqa xmm1, xmm5 ; d1 + punpcklwd xmm1, xmm4 ; c1 d1 + punpckhwd xmm5, xmm4 ; c1 d1 + + movdqa xmm3, xmm1 + movdqa xmm4, xmm5 + + pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + + paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] + paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] + + psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + + packssdw xmm1, xmm4 ; op[4] + packssdw xmm3, xmm5 ; op[12] + + paddw xmm1, xmm6 ; op[4] += (d1!=0) + + movdqa xmm4, xmm0 + movdqa xmm5, xmm2 + + punpcklqdq xmm0, xmm1 + punpckhqdq xmm4, xmm1 + + punpcklqdq xmm2, xmm3 + punpckhqdq xmm5, xmm3 + + movdqa XMMWORD PTR[output + 0 ], xmm0 + movdqa XMMWORD PTR[output + 16], xmm2 + movdqa XMMWORD PTR[output + 32], xmm4 + movdqa XMMWORD PTR[output + 48], xmm5 + + STACK_FRAME_DESTROY SECTION_RODATA align 16 @@ -161,7 +397,9 @@ _cmp_mask: times 4 dw 1 times 4 dw 0 - +align 16 +_cmp_mask8x4: + times 8 dw 1 align 16 _mult_sub: dw 1 @@ -176,6 +414,9 @@ _7: times 4 dd 7 align 16 +_7w: + times 8 dw 7 +align 16 _14500: times 4 dd 14500 align 16 diff -Nru libvpx-0.9.5/vp8/encoder/x86/dct_x86.h libvpx-0.9.6/vp8/encoder/x86/dct_x86.h --- libvpx-0.9.5/vp8/encoder/x86/dct_x86.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/dct_x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -24,33 +24,31 @@ extern prototype_fdct(vp8_short_fdct8x4_mmx); #if !CONFIG_RUNTIME_CPU_DETECT -#if 0 + #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx #undef vp8_fdct_short8x4 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx -#endif #endif + #endif #if HAVE_SSE2 -extern prototype_fdct(vp8_short_fdct8x4_wmt); +extern prototype_fdct(vp8_short_fdct8x4_sse2); extern prototype_fdct(vp8_short_walsh4x4_sse2); extern prototype_fdct(vp8_short_fdct4x4_sse2); #if !CONFIG_RUNTIME_CPU_DETECT -#if 1 -/* short SSE2 DCT currently disabled, does not match the MMX version */ + #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2 #undef vp8_fdct_short8x4 #define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2 -#endif #undef vp8_fdct_fast4x4 #define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2 @@ -58,7 +56,7 @@ #undef vp8_fdct_fast8x4 #define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2 -#undef vp8_fdct_walsh_short4x4 +#undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2 #endif diff -Nru libvpx-0.9.5/vp8/encoder/x86/fwalsh_sse2.asm libvpx-0.9.6/vp8/encoder/x86/fwalsh_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/fwalsh_sse2.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/fwalsh_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -17,6 +17,7 @@ push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -143,6 +144,7 @@ pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff -Nru libvpx-0.9.5/vp8/encoder/x86/mcomp_x86.h libvpx-0.9.6/vp8/encoder/x86/mcomp_x86.h --- libvpx-0.9.5/vp8/encoder/x86/mcomp_x86.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/mcomp_x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -24,5 +24,14 @@ #endif #endif +#if HAVE_SSE4_1 +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_search_full_search +#define vp8_search_full_search vp8_full_search_sadx8 + +#endif +#endif + #endif diff -Nru libvpx-0.9.5/vp8/encoder/x86/preproc_mmx.c libvpx-0.9.6/vp8/encoder/x86/preproc_mmx.c --- libvpx-0.9.5/vp8/encoder/x86/preproc_mmx.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/preproc_mmx.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,298 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "memory.h" -#include "preproc.h" -#include "pragmas.h" - -/**************************************************************************** -* Macros -****************************************************************************/ -#define FRAMECOUNT 7 -#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) ) - -/**************************************************************************** -* Imports -****************************************************************************/ -extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled); - -/**************************************************************************** -* Exported Global Variables -****************************************************************************/ -void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength); - -/**************************************************************************** - * - * ROUTINE : temp_filter_wmt - * - * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance. - * unsigned char *s : Pointer to source frame. - * unsigned char *d : Pointer to destination frame. - * int bytes : Number of bytes to filter. - * int strength : Strength of filter to apply. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs a closesness adjusted temporarl blur - * - * SPECIAL NOTES : Destination frame can be same as source frame. - * - ****************************************************************************/ -void temp_filter_wmt -( - pre_proc_instance *ppi, - unsigned char *s, - unsigned char *d, - int bytes, - int strength -) -{ - int byte = 0; - unsigned char *frameptr = ppi->frame_buffer; - - __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3}; - __declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16, 16, 16, 16, 16}; - - if (ppi->frame == 0) - { - do - { - int i; - int frame = 0; - - do - { - for (i = 0; i < 8; i++) - { - *frameptr = s[byte+i]; - ++frameptr; - } - - ++frame; - } - while (frame < FRAMECOUNT); - - for (i = 0; i < 8; i++) - d[byte+i] = s[byte+i]; - - byte += 8; - - } - while (byte < bytes); - } - else - { - int i; - int offset2 = (ppi->frame % FRAMECOUNT); - - do - { - __declspec(align(16)) unsigned short counts[8]; - __declspec(align(16)) unsigned short sums[8]; - __asm - { - mov eax, offset2 - mov edi, s // source pixels - pxor xmm1, xmm1 // accumulator - - pxor xmm7, xmm7 - - mov esi, frameptr // accumulator - pxor xmm2, xmm2 // count - - movq xmm3, QWORD PTR [edi] - - movq QWORD PTR [esi+8*eax], xmm3 - - punpcklbw xmm3, xmm2 // xmm3 source pixels - mov ecx, FRAMECOUNT - - next_frame: - movq xmm4, QWORD PTR [esi] // get frame buffer values - punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels - movdqa xmm6, xmm4 // save the pixel values - psubsw xmm4, xmm3 // subtracted pixel values - pmullw xmm4, xmm4 // square xmm4 - movd xmm5, strength - psrlw xmm4, xmm5 // should be strength - pmullw xmm4, threes // 3 * modifier - movdqa xmm5, sixteens // 16s - psubusw xmm5, xmm4 // 16 - modifiers - movdqa xmm4, xmm5 // save the modifiers - pmullw xmm4, xmm6 // multiplier values - paddusw xmm1, xmm4 // accumulator - paddusw xmm2, xmm5 // count - add esi, 8 // next frame - dec ecx // next set of eight pixels - jnz next_frame - - movdqa counts, xmm2 - psrlw xmm2, 1 // divide count by 2 for rounding - paddusw xmm1, xmm2 // rounding added in - - mov frameptr, esi - - movdqa sums, xmm1 - } - - for (i = 0; i < 8; i++) - { - int blurvalue = sums[i] * ppi->fixed_divide[counts[i]]; - blurvalue >>= 16; - d[i] = blurvalue; - } - - s += 8; - d += 8; - byte += 8; - } - while (byte < bytes); - } - - ++ppi->frame; - __asm emms -} - -/**************************************************************************** - * - * ROUTINE : temp_filter_mmx - * - * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance. - * unsigned char *s : Pointer to source frame. - * unsigned char *d : Pointer to destination frame. - * int bytes : Number of bytes to filter. - * int strength : Strength of filter to apply. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs a closesness adjusted temporarl blur - * - * SPECIAL NOTES : Destination frame can be same as source frame. - * - ****************************************************************************/ -void temp_filter_mmx -( - pre_proc_instance *ppi, - unsigned char *s, - unsigned char *d, - int bytes, - int strength -) -{ - int byte = 0; - unsigned char *frameptr = ppi->frame_buffer; - - __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3}; - __declspec(align(16)) unsigned short sixteens[] = {16, 16, 16, 16}; - - if (ppi->frame == 0) - { - do - { - int i; - int frame = 0; - - do - { - for (i = 0; i < 4; i++) - { - *frameptr = s[byte+i]; - ++frameptr; - } - - ++frame; - } - while (frame < FRAMECOUNT); - - for (i = 0; i < 4; i++) - d[byte+i] = s[byte+i]; - - byte += 4; - - } - while (byte < bytes); - } - else - { - int i; - int offset2 = (ppi->frame % FRAMECOUNT); - - do - { - __declspec(align(16)) unsigned short counts[8]; - __declspec(align(16)) unsigned short sums[8]; - __asm - { - - mov eax, offset2 - mov edi, s // source pixels - pxor mm1, mm1 // accumulator - pxor mm7, mm7 - - mov esi, frameptr // accumulator - pxor mm2, mm2 // count - - movd mm3, DWORD PTR [edi] - movd DWORD PTR [esi+4*eax], mm3 - - punpcklbw mm3, mm2 // mm3 source pixels - mov ecx, FRAMECOUNT - - next_frame: - movd mm4, DWORD PTR [esi] // get frame buffer values - punpcklbw mm4, mm7 // mm4 frame buffer pixels - movq mm6, mm4 // save the pixel values - psubsw mm4, mm3 // subtracted pixel values - pmullw mm4, mm4 // square mm4 - movd mm5, strength - psrlw mm4, mm5 // should be strength - pmullw mm4, threes // 3 * modifier - movq mm5, sixteens // 16s - psubusw mm5, mm4 // 16 - modifiers - movq mm4, mm5 // save the modifiers - pmullw mm4, mm6 // multiplier values - paddusw mm1, mm4 // accumulator - paddusw mm2, mm5 // count - add esi, 4 // next frame - dec ecx // next set of eight pixels - jnz next_frame - - movq counts, mm2 - psrlw mm2, 1 // divide count by 2 for rounding - paddusw mm1, mm2 // rounding added in - - mov frameptr, esi - - movq sums, mm1 - - } - - for (i = 0; i < 4; i++) - { - int blurvalue = sums[i] * ppi->fixed_divide[counts[i]]; - blurvalue >>= 16; - d[i] = blurvalue; - } - - s += 4; - d += 4; - byte += 4; - } - while (byte < bytes); - } - - ++ppi->frame; - __asm emms -} diff -Nru libvpx-0.9.5/vp8/encoder/x86/quantize_sse2.asm libvpx-0.9.6/vp8/encoder/x86/quantize_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/quantize_sse2.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/quantize_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -11,220 +11,169 @@ %include "vpx_ports/x86_abi_support.asm" -;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; const int *default_zig_zag, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr, +;int vp8_regular_quantize_b_impl_sse2( +; short *coeff_ptr, +; short *zbin_ptr, +; short *qcoeff_ptr, +; short *dequant_ptr, +; const int *default_zig_zag, +; short *round_ptr, +; short *quant_ptr, +; short *dqcoeff_ptr, ; unsigned short zbin_oq_value, -; short *zbin_boost_ptr); +; short *zbin_boost_ptr, +; short *quant_shift); ; global sym(vp8_regular_quantize_b_impl_sse2) sym(vp8_regular_quantize_b_impl_sse2): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 10 + SHADOW_ARGS_TO_STACK 11 + SAVE_XMM push rsi push rdi push rbx - ; end prolog - ALIGN_STACK 16, rax + %define abs_minus_zbin 0 + %define temp_qcoeff 32 + %define qcoeff 64 + %define eob_tmp 96 + %define stack_size 112 + sub rsp, stack_size + ; end prolog - %define abs_minus_zbin_lo 0 - %define abs_minus_zbin_hi 16 - %define temp_qcoeff_lo 32 - %define temp_qcoeff_hi 48 - %define save_xmm6 64 - %define save_xmm7 80 - %define eob 96 - - %define vp8_regularquantizeb_stack_size eob + 16 - - sub rsp, vp8_regularquantizeb_stack_size - - movdqa OWORD PTR[rsp + save_xmm6], xmm6 - movdqa OWORD PTR[rsp + save_xmm7], xmm7 - - mov rdx, arg(0) ;coeff_ptr - mov eax, arg(8) ;zbin_oq_value - - mov rcx, arg(1) ;zbin_ptr - movd xmm7, eax + mov rdx, arg(0) ; coeff_ptr + mov rcx, arg(1) ; zbin_ptr + movd xmm7, arg(8) ; zbin_oq_value + mov rdi, arg(5) ; round_ptr + mov rsi, arg(6) ; quant_ptr + ; z movdqa xmm0, OWORD PTR[rdx] movdqa xmm4, OWORD PTR[rdx + 16] + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value + movdqa xmm1, xmm0 movdqa xmm5, xmm4 - psraw xmm0, 15 ;sign of z (aka sz) - psraw xmm4, 15 ;sign of z (aka sz) + ; sz + psraw xmm0, 15 + psraw xmm4, 15 + ; (z ^ sz) pxor xmm1, xmm0 pxor xmm5, xmm4 - movdqa xmm2, OWORD PTR[rcx] ;load zbin_ptr - movdqa xmm3, OWORD PTR[rcx + 16] ;load zbin_ptr - - pshuflw xmm7, xmm7, 0 - psubw xmm1, xmm0 ;x = abs(z) + ; x = abs(z) + psubw xmm1, xmm0 + psubw xmm5, xmm4 - punpcklwd xmm7, xmm7 ;duplicated zbin_oq_value - psubw xmm5, xmm4 ;x = abs(z) + movdqa xmm2, OWORD PTR[rcx] + movdqa xmm3, OWORD PTR[rcx + 16] + ; *zbin_ptr + zbin_oq_value paddw xmm2, xmm7 paddw xmm3, xmm7 - psubw xmm1, xmm2 ;sub (zbin_ptr + zbin_oq_value) - psubw xmm5, xmm3 ;sub (zbin_ptr + zbin_oq_value) - - mov rdi, arg(5) ;round_ptr - mov rsi, arg(6) ;quant_ptr - - movdqa OWORD PTR[rsp + abs_minus_zbin_lo], xmm1 - movdqa OWORD PTR[rsp + abs_minus_zbin_hi], xmm5 + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm1, xmm2 + psubw xmm5, xmm3 + movdqa OWORD PTR[rsp + abs_minus_zbin], xmm1 + movdqa OWORD PTR[rsp + abs_minus_zbin + 16], xmm5 - paddw xmm1, xmm2 ;add (zbin_ptr + zbin_oq_value) back - paddw xmm5, xmm3 ;add (zbin_ptr + zbin_oq_value) back + ; add (zbin_ptr + zbin_oq_value) back + paddw xmm1, xmm2 + paddw xmm5, xmm3 movdqa xmm2, OWORD PTR[rdi] - movdqa xmm3, OWORD PTR[rsi] - movdqa xmm6, OWORD PTR[rdi + 16] + + movdqa xmm3, OWORD PTR[rsi] movdqa xmm7, OWORD PTR[rsi + 16] + ; x + round paddw xmm1, xmm2 paddw xmm5, xmm6 - pmulhw xmm1, xmm3 - pmulhw xmm5, xmm7 - - mov rsi, arg(2) ;qcoeff_ptr - pxor xmm6, xmm6 - - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa OWORD PTR[rsp + temp_qcoeff_lo], xmm1 - movdqa OWORD PTR[rsp + temp_qcoeff_hi], xmm5 - - movdqa OWORD PTR[rsi], xmm6 ;zero qcoeff - movdqa OWORD PTR[rsi + 16], xmm6 ;zero qcoeff - - xor rax, rax - mov rcx, -1 - - mov [rsp + eob], rcx - mov rsi, arg(9) ;zbin_boost_ptr - - mov rbx, arg(4) ;default_zig_zag - -rq_zigzag_loop: - movsxd rcx, DWORD PTR[rbx + rax*4] ;now we have rc - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - - sub edx, edi ;x - zbin - jl rq_zigzag_1 - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1 - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ + ; y = x * quant_ptr >> 16 + pmulhw xmm3, xmm1 + pmulhw xmm7, xmm5 - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1a - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1a - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1a: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1b - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1b - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1b: - movsxd rcx, DWORD PTR[rbx + rax*4 + 4] - movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin - lea rsi, [rsi + 2] ;zbin_boost_ptr++ - - movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] - lea rax, [rax + 1] - - sub edx, edi ;x - zbin - jl rq_zigzag_1c - - mov rdi, arg(2) ;qcoeff_ptr - - movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] - - cmp edx, 0 - je rq_zigzag_1c - - mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] - - mov rsi, arg(9) ;zbin_boost_ptr - mov [rsp + eob], rax ;eob = i - -rq_zigzag_1c: - lea rax, [rax + 1] - - cmp rax, 16 - jl rq_zigzag_loop + ; y += x + paddw xmm1, xmm3 + paddw xmm5, xmm7 - mov rdi, arg(2) ;qcoeff_ptr - mov rcx, arg(3) ;dequant_ptr - mov rsi, arg(7) ;dqcoeff_ptr + movdqa OWORD PTR[rsp + temp_qcoeff], xmm1 + movdqa OWORD PTR[rsp + temp_qcoeff + 16], xmm5 - movdqa xmm2, OWORD PTR[rdi] - movdqa xmm3, OWORD PTR[rdi + 16] + pxor xmm6, xmm6 + ; zero qcoeff + movdqa OWORD PTR[rsp + qcoeff], xmm6 + movdqa OWORD PTR[rsp + qcoeff + 16], xmm6 + + mov [rsp + eob_tmp], DWORD -1 ; eob + mov rsi, arg(9) ; zbin_boost_ptr + mov rdi, arg(4) ; default_zig_zag + mov rax, arg(10) ; quant_shift_ptr + +%macro ZIGZAG_LOOP 2 +rq_zigzag_loop_%1: + movsxd rdx, DWORD PTR[rdi + (%1 * 4)] ; rc + movsx ebx, WORD PTR [rsi] ; *zbin_boost_ptr + lea rsi, [rsi + 2] ; zbin_boost_ptr++ + + ; x + movsx ecx, WORD PTR[rsp + abs_minus_zbin + rdx *2] + + ; if (x >= zbin) + sub ecx, ebx ; x - zbin + jl rq_zigzag_loop_%2 ; x < zbin + + movsx ebx, WORD PTR[rsp + temp_qcoeff + rdx *2] + + ; downshift by quant_shift[rdx] + movsx ecx, WORD PTR[rax + rdx*2] ; quant_shift_ptr[rc] + sar ebx, cl ; also sets Z bit + je rq_zigzag_loop_%2 ; !y + mov WORD PTR[rsp + qcoeff + rdx * 2], bx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ; reset to b->zrun_zbin_boost + mov [rsp + eob_tmp], DWORD %1 ; eob = i +%endmacro +ZIGZAG_LOOP 0, 1 +ZIGZAG_LOOP 1, 2 +ZIGZAG_LOOP 2, 3 +ZIGZAG_LOOP 3, 4 +ZIGZAG_LOOP 4, 5 +ZIGZAG_LOOP 5, 6 +ZIGZAG_LOOP 6, 7 +ZIGZAG_LOOP 7, 8 +ZIGZAG_LOOP 8, 9 +ZIGZAG_LOOP 9, 10 +ZIGZAG_LOOP 10, 11 +ZIGZAG_LOOP 11, 12 +ZIGZAG_LOOP 12, 13 +ZIGZAG_LOOP 13, 14 +ZIGZAG_LOOP 14, 15 +ZIGZAG_LOOP 15, end +rq_zigzag_loop_end: + + mov rbx, arg(2) ; qcoeff_ptr + mov rcx, arg(3) ; dequant_ptr + mov rsi, arg(7) ; dqcoeff_ptr + mov rax, [rsp + eob_tmp] ; eob + + movdqa xmm2, OWORD PTR[rsp + qcoeff] + movdqa xmm3, OWORD PTR[rsp + qcoeff + 16] + + ; y ^ sz + pxor xmm2, xmm0 + pxor xmm3, xmm4 + ; x = (y ^ sz) - sz + psubw xmm2, xmm0 + psubw xmm3, xmm4 movdqa xmm0, OWORD PTR[rcx] movdqa xmm1, OWORD PTR[rcx + 16] @@ -232,31 +181,27 @@ pmullw xmm0, xmm2 pmullw xmm1, xmm3 - movdqa OWORD PTR[rsi], xmm0 ;store dqcoeff - movdqa OWORD PTR[rsi + 16], xmm1 ;store dqcoeff - - mov rax, [rsp + eob] - - movdqa xmm6, OWORD PTR[rsp + save_xmm6] - movdqa xmm7, OWORD PTR[rsp + save_xmm7] + movdqa OWORD PTR[rbx], xmm2 + movdqa OWORD PTR[rbx + 16], xmm3 + movdqa OWORD PTR[rsi], xmm0 ; store dqcoeff + movdqa OWORD PTR[rsi + 16], xmm1 ; store dqcoeff add rax, 1 - add rsp, vp8_regularquantizeb_stack_size - pop rsp - ; begin epilog + add rsp, stack_size + pop rsp pop rbx pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret - ;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, ; short *qcoeff_ptr,short *dequant_ptr, -; short *scan_mask, short *round_ptr, +; short *inv_scan_order, short *round_ptr, ; short *quant_ptr, short *dqcoeff_ptr); global sym(vp8_fast_quantize_b_impl_sse2) sym(vp8_fast_quantize_b_impl_sse2): @@ -265,32 +210,18 @@ SHADOW_ARGS_TO_STACK 7 push rsi push rdi - push rbx ; end prolog - ALIGN_STACK 16, rax - - %define save_xmm6 0 - %define save_xmm7 16 - - %define vp8_fastquantizeb_stack_size save_xmm7 + 16 - - sub rsp, vp8_fastquantizeb_stack_size - - movdqa XMMWORD PTR[rsp + save_xmm6], xmm6 - movdqa XMMWORD PTR[rsp + save_xmm7], xmm7 - mov rdx, arg(0) ;coeff_ptr mov rcx, arg(2) ;dequant_ptr - mov rax, arg(3) ;scan_mask mov rdi, arg(4) ;round_ptr mov rsi, arg(5) ;quant_ptr movdqa xmm0, XMMWORD PTR[rdx] movdqa xmm4, XMMWORD PTR[rdx + 16] - movdqa xmm6, XMMWORD PTR[rdi] ;round lo - movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi + movdqa xmm2, XMMWORD PTR[rdi] ;round lo + movdqa xmm3, XMMWORD PTR[rdi + 16] ;round hi movdqa xmm1, xmm0 movdqa xmm5, xmm4 @@ -303,8 +234,8 @@ psubw xmm1, xmm0 ;x = abs(z) psubw xmm5, xmm4 ;x = abs(z) - paddw xmm1, xmm6 - paddw xmm5, xmm7 + paddw xmm1, xmm2 + paddw xmm5, xmm3 pmulhw xmm1, XMMWORD PTR[rsi] pmulhw xmm5, XMMWORD PTR[rsi + 16] @@ -312,8 +243,8 @@ mov rdi, arg(1) ;qcoeff_ptr mov rsi, arg(6) ;dqcoeff_ptr - movdqa xmm6, XMMWORD PTR[rcx] - movdqa xmm7, XMMWORD PTR[rcx + 16] + movdqa xmm2, XMMWORD PTR[rcx] + movdqa xmm3, XMMWORD PTR[rcx + 16] pxor xmm1, xmm0 pxor xmm5, xmm4 @@ -323,64 +254,47 @@ movdqa XMMWORD PTR[rdi], xmm1 movdqa XMMWORD PTR[rdi + 16], xmm5 - pmullw xmm6, xmm1 - pmullw xmm7, xmm5 + pmullw xmm2, xmm1 + pmullw xmm3, xmm5 - movdqa xmm2, XMMWORD PTR[rax] - movdqa xmm3, XMMWORD PTR[rax+16]; + mov rdi, arg(3) ;inv_scan_order - pxor xmm4, xmm4 ;clear all bits + ; Start with 16 + pxor xmm4, xmm4 ;clear all bits pcmpeqw xmm1, xmm4 pcmpeqw xmm5, xmm4 - pcmpeqw xmm4, xmm4 ;set all bits + pcmpeqw xmm4, xmm4 ;set all bits pxor xmm1, xmm4 pxor xmm5, xmm4 - psrlw xmm1, 15 - psrlw xmm5, 15 + pand xmm1, XMMWORD PTR[rdi] + pand xmm5, XMMWORD PTR[rdi+16] - pmaddwd xmm1, xmm2 - pmaddwd xmm5, xmm3 + pmaxsw xmm1, xmm5 - movq xmm2, xmm1 - movq xmm3, xmm5 + ; now down to 8 + pshufd xmm5, xmm1, 00001110b - psrldq xmm1, 8 - psrldq xmm5, 8 + pmaxsw xmm1, xmm5 - paddd xmm1, xmm5 - paddd xmm2, xmm3 + ; only 4 left + pshuflw xmm5, xmm1, 00001110b - paddd xmm1, xmm2 - movq xmm5, xmm1 + pmaxsw xmm1, xmm5 - psrldq xmm1, 4 - paddd xmm5, xmm1 + ; okay, just 2! + pshuflw xmm5, xmm1, 00000001b - movq rcx, xmm5 - and rcx, 0xffff + pmaxsw xmm1, xmm5 - xor rdx, rdx - sub rdx, rcx + movd rax, xmm1 + and rax, 0xff - bsr rax, rcx - inc rax - - sar rdx, 31 - and rax, rdx - - movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff - movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff - - movdqa xmm6, XMMWORD PTR[rsp + save_xmm6] - movdqa xmm7, XMMWORD PTR[rsp + save_xmm7] - - add rsp, vp8_fastquantizeb_stack_size - pop rsp + movdqa XMMWORD PTR[rsi], xmm2 ;store dqcoeff + movdqa XMMWORD PTR[rsi + 16], xmm3 ;store dqcoeff ; begin epilog - pop rbx pop rdi pop rsi UNSHADOW_ARGS diff -Nru libvpx-0.9.5/vp8/encoder/x86/quantize_ssse3.asm libvpx-0.9.6/vp8/encoder/x86/quantize_ssse3.asm --- libvpx-0.9.5/vp8/encoder/x86/quantize_ssse3.asm 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/quantize_ssse3.asm 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,114 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr +; short *qcoeff_ptr,short *dequant_ptr, +; short *round_ptr, +; short *quant_ptr, short *dqcoeff_ptr); +; +global sym(vp8_fast_quantize_b_impl_ssse3) +sym(vp8_fast_quantize_b_impl_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(0) ;coeff_ptr + mov rdi, arg(3) ;round_ptr + mov rsi, arg(4) ;quant_ptr + + movdqa xmm0, [rdx] + movdqa xmm4, [rdx + 16] + + movdqa xmm2, [rdi] ;round lo + movdqa xmm3, [rdi + 16] ;round hi + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + psraw xmm0, 15 ;sign of z (aka sz) + psraw xmm4, 15 ;sign of z (aka sz) + + pabsw xmm1, xmm1 + pabsw xmm5, xmm5 + + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + pmulhw xmm1, [rsi] + pmulhw xmm5, [rsi + 16] + + mov rdi, arg(1) ;qcoeff_ptr + mov rcx, arg(2) ;dequant_ptr + mov rsi, arg(5) ;dqcoeff_ptr + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa [rdi], xmm1 + movdqa [rdi + 16], xmm5 + + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + + pxor xmm4, xmm4 + pmullw xmm2, xmm1 + pmullw xmm3, xmm5 + + pcmpeqw xmm1, xmm4 ;non zero mask + pcmpeqw xmm5, xmm4 ;non zero mask + packsswb xmm1, xmm5 + pshufb xmm1, [ GLOBAL(zz_shuf)] + + pmovmskb edx, xmm1 + +; xor ecx, ecx +; mov eax, -1 +;find_eob_loop: +; shr edx, 1 +; jc fq_skip +; mov eax, ecx +;fq_skip: +; inc ecx +; cmp ecx, 16 +; jne find_eob_loop + xor rdi, rdi + mov eax, -1 + xor dx, ax ;flip the bits for bsr + bsr eax, edx + + movdqa [rsi], xmm2 ;store dqcoeff + movdqa [rsi + 16], xmm3 ;store dqcoeff + + sub edi, edx ;check for all zeros in bit mask + sar edi, 31 ;0 or -1 + add eax, 1 + and eax, edi ;if the bit mask was all zero, + ;then eob = 0 + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +zz_shuf: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff -Nru libvpx-0.9.5/vp8/encoder/x86/quantize_x86.h libvpx-0.9.6/vp8/encoder/x86/quantize_x86.h --- libvpx-0.9.5/vp8/encoder/x86/quantize_x86.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/quantize_x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -27,11 +27,11 @@ #if !CONFIG_RUNTIME_CPU_DETECT -/* The sse2 quantizer has not been updated to match the new exact - * quantizer introduced in commit e04e2935 - *#undef vp8_quantize_quantb - *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2 - */ +// Currently, this function realizes a gain on x86 and a loss on x86_64 +#if ARCH_X86 +#undef vp8_quantize_quantb +#define vp8_quantize_quantb vp8_regular_quantize_b_sse2 +#endif #endif diff -Nru libvpx-0.9.5/vp8/encoder/x86/sad_sse3.asm libvpx-0.9.6/vp8/encoder/x86/sad_sse3.asm --- libvpx-0.9.5/vp8/encoder/x86/sad_sse3.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/sad_sse3.asm 2011-03-04 20:40:40.000000000 +0000 @@ -8,24 +8,169 @@ ; be found in the AUTHORS file in the root of the source tree. ; - %include "vpx_ports/x86_abi_support.asm" -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm5, XMMWORD PTR [rdi] - lddqu xmm6, XMMWORD PTR [rdi+1] - lddqu xmm7, XMMWORD PTR [rdi+2] +%macro STACK_FRAME_CREATE_X3 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define ref_ptr rdi + %define ref_stride rdx + %define end_ptr rcx + %define ret_var rbx + %define result_ptr arg(4) + %define max_err arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx + + mov rsi, arg(0) ; src_ptr + mov rdi, arg(2) ; ref_ptr + + movsxd rax, dword ptr arg(1) ; src_stride + movsxd rdx, dword ptr arg(3) ; ref_stride +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define src_ptr rcx + %define src_stride rdx + %define ref_ptr r8 + %define ref_stride r9 + %define end_ptr r10 + %define ret_var r11 + %define result_ptr [rsp+8+4*8] + %define max_err [rsp+8+4*8] + %else + %define src_ptr rdi + %define src_stride rsi + %define ref_ptr rdx + %define ref_stride rcx + %define end_ptr r9 + %define ret_var r10 + %define result_ptr r8 + %define max_err r8 + %endif +%endif + +%endmacro + +%macro STACK_FRAME_DESTROY_X3 0 + %define src_ptr + %define src_stride + %define ref_ptr + %define ref_stride + %define end_ptr + %define ret_var + %define result_ptr + %define max_err + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + %endif +%endif + ret +%endmacro + +%macro STACK_FRAME_CREATE_X4 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define r0_ptr rcx + %define r1_ptr rdx + %define r2_ptr rbx + %define r3_ptr rdi + %define ref_stride rbp + %define result_ptr arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx + + push rbp + mov rdi, arg(2) ; ref_ptr_base + + LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi + + mov rsi, arg(0) ; src_ptr + + movsxd rbx, dword ptr arg(1) ; src_stride + movsxd rbp, dword ptr arg(3) ; ref_stride + + xchg rbx, rax +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define src_ptr rcx + %define src_stride rdx + %define r0_ptr rsi + %define r1_ptr r10 + %define r2_ptr r11 + %define r3_ptr r8 + %define ref_stride r9 + %define result_ptr [rsp+16+4*8] + push rsi + + LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr + %else + %define src_ptr rdi + %define src_stride rsi + %define r0_ptr r9 + %define r1_ptr r10 + %define r2_ptr r11 + %define r3_ptr rdx + %define ref_stride rcx + %define result_ptr r8 + + LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr + + %endif +%endif +%endmacro + +%macro STACK_FRAME_DESTROY_X4 0 + %define src_ptr + %define src_stride + %define r0_ptr + %define r1_ptr + %define r2_ptr + %define r3_ptr + %define ref_stride + %define result_ptr + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + %endif +%endif + ret +%endmacro + +%macro PROCESS_16X2X3 5 +%if %1==0 + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm5, XMMWORD PTR [%3] + lddqu xmm6, XMMWORD PTR [%3+1] + lddqu xmm7, XMMWORD PTR [%3+2] psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rdi] - lddqu xmm2, XMMWORD PTR [rdi+1] - lddqu xmm3, XMMWORD PTR [rdi+2] + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm1, XMMWORD PTR [%3] + lddqu xmm2, XMMWORD PTR [%3+1] + lddqu xmm3, XMMWORD PTR [%3+2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 @@ -35,13 +180,15 @@ paddw xmm6, xmm2 paddw xmm7, xmm3 %endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rdi+rdx] - lddqu xmm2, XMMWORD PTR [rdi+rdx+1] - lddqu xmm3, XMMWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] + movdqa xmm0, XMMWORD PTR [%2+%4] + lddqu xmm1, XMMWORD PTR [%3+%5] + lddqu xmm2, XMMWORD PTR [%3+%5+1] + lddqu xmm3, XMMWORD PTR [%3+%5+2] + +%if %1==0 || %1==1 + lea %2, [%2+%4*2] + lea %3, [%3+%5*2] +%endif psadbw xmm1, xmm0 psadbw xmm2, xmm0 @@ -52,21 +199,21 @@ paddw xmm7, xmm3 %endmacro -%macro PROCESS_8X2X3 1 -%if %1 - movq mm0, QWORD PTR [rsi] - movq mm5, QWORD PTR [rdi] - movq mm6, QWORD PTR [rdi+1] - movq mm7, QWORD PTR [rdi+2] +%macro PROCESS_8X2X3 5 +%if %1==0 + movq mm0, QWORD PTR [%2] + movq mm5, QWORD PTR [%3] + movq mm6, QWORD PTR [%3+1] + movq mm7, QWORD PTR [%3+2] psadbw mm5, mm0 psadbw mm6, mm0 psadbw mm7, mm0 %else - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rdi] - movq mm2, QWORD PTR [rdi+1] - movq mm3, QWORD PTR [rdi+2] + movq mm0, QWORD PTR [%2] + movq mm1, QWORD PTR [%3] + movq mm2, QWORD PTR [%3+1] + movq mm3, QWORD PTR [%3+2] psadbw mm1, mm0 psadbw mm2, mm0 @@ -76,13 +223,15 @@ paddw mm6, mm2 paddw mm7, mm3 %endif - movq mm0, QWORD PTR [rsi+rax] - movq mm1, QWORD PTR [rdi+rdx] - movq mm2, QWORD PTR [rdi+rdx+1] - movq mm3, QWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] + movq mm0, QWORD PTR [%2+%4] + movq mm1, QWORD PTR [%3+%5] + movq mm2, QWORD PTR [%3+%5+1] + movq mm3, QWORD PTR [%3+%5+2] + +%if %1==0 || %1==1 + lea %2, [%2+%4*2] + lea %3, [%3+%5*2] +%endif psadbw mm1, mm0 psadbw mm2, mm0 @@ -101,115 +250,117 @@ mov %5, [%1+REG_SZ_BYTES*3] %endmacro -%macro PROCESS_16X2X4 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm4, XMMWORD PTR [rcx] - lddqu xmm5, XMMWORD PTR [rdx] - lddqu xmm6, XMMWORD PTR [rbx] - lddqu xmm7, XMMWORD PTR [rdi] +%macro PROCESS_16X2X4 8 +%if %1==0 + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm4, XMMWORD PTR [%3] + lddqu xmm5, XMMWORD PTR [%4] + lddqu xmm6, XMMWORD PTR [%5] + lddqu xmm7, XMMWORD PTR [%6] psadbw xmm4, xmm0 psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rcx] - lddqu xmm2, XMMWORD PTR [rdx] - lddqu xmm3, XMMWORD PTR [rbx] + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm1, XMMWORD PTR [%3] + lddqu xmm2, XMMWORD PTR [%4] + lddqu xmm3, XMMWORD PTR [%5] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [rdi] + lddqu xmm1, XMMWORD PTR [%6] paddw xmm5, xmm2 paddw xmm6, xmm3 psadbw xmm1, xmm0 paddw xmm7, xmm1 %endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rcx+rbp] - lddqu xmm2, XMMWORD PTR [rdx+rbp] - lddqu xmm3, XMMWORD PTR [rbx+rbp] + movdqa xmm0, XMMWORD PTR [%2+%7] + lddqu xmm1, XMMWORD PTR [%3+%8] + lddqu xmm2, XMMWORD PTR [%4+%8] + lddqu xmm3, XMMWORD PTR [%5+%8] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm4, xmm1 - lddqu xmm1, XMMWORD PTR [rdi+rbp] + lddqu xmm1, XMMWORD PTR [%6+%8] paddw xmm5, xmm2 paddw xmm6, xmm3 - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] +%if %1==0 || %1==1 + lea %2, [%2+%7*2] + lea %3, [%3+%8*2] - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] - - lea rdi, [rdi+rbp*2] + lea %4, [%4+%8*2] + lea %5, [%5+%8*2] + lea %6, [%6+%8*2] +%endif psadbw xmm1, xmm0 paddw xmm7, xmm1 %endmacro -%macro PROCESS_8X2X4 1 -%if %1 - movq mm0, QWORD PTR [rsi] - movq mm4, QWORD PTR [rcx] - movq mm5, QWORD PTR [rdx] - movq mm6, QWORD PTR [rbx] - movq mm7, QWORD PTR [rdi] +%macro PROCESS_8X2X4 8 +%if %1==0 + movq mm0, QWORD PTR [%2] + movq mm4, QWORD PTR [%3] + movq mm5, QWORD PTR [%4] + movq mm6, QWORD PTR [%5] + movq mm7, QWORD PTR [%6] psadbw mm4, mm0 psadbw mm5, mm0 psadbw mm6, mm0 psadbw mm7, mm0 %else - movq mm0, QWORD PTR [rsi] - movq mm1, QWORD PTR [rcx] - movq mm2, QWORD PTR [rdx] - movq mm3, QWORD PTR [rbx] + movq mm0, QWORD PTR [%2] + movq mm1, QWORD PTR [%3] + movq mm2, QWORD PTR [%4] + movq mm3, QWORD PTR [%5] psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm4, mm1 - movq mm1, QWORD PTR [rdi] + movq mm1, QWORD PTR [%6] paddw mm5, mm2 paddw mm6, mm3 psadbw mm1, mm0 paddw mm7, mm1 %endif - movq mm0, QWORD PTR [rsi+rax] - movq mm1, QWORD PTR [rcx+rbp] - movq mm2, QWORD PTR [rdx+rbp] - movq mm3, QWORD PTR [rbx+rbp] + movq mm0, QWORD PTR [%2+%7] + movq mm1, QWORD PTR [%3+%8] + movq mm2, QWORD PTR [%4+%8] + movq mm3, QWORD PTR [%5+%8] psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm4, mm1 - movq mm1, QWORD PTR [rdi+rbp] + movq mm1, QWORD PTR [%6+%8] paddw mm5, mm2 paddw mm6, mm3 - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] - - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] +%if %1==0 || %1==1 + lea %2, [%2+%7*2] + lea %3, [%3+%8*2] - lea rdi, [rdi+rbp*2] + lea %4, [%4+%8*2] + lea %5, [%5+%8*2] + lea %6, [%6+%8*2] +%endif psadbw mm1, mm0 paddw mm7, mm1 @@ -223,54 +374,39 @@ ; int *results) global sym(vp8_sad16x16x3_sse3) sym(vp8_sad16x16x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + STACK_FRAME_CREATE_X3 - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 + PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - mov rdi, arg(4) ;Results + mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad16x8x3_sse3( ; unsigned char *src_ptr, @@ -280,50 +416,35 @@ ; int *results) global sym(vp8_sad16x8x3_sse3) sym(vp8_sad16x8x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - - mov rdi, arg(4) ;Results + mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad8x16x3_sse3( ; unsigned char *src_ptr, @@ -333,40 +454,26 @@ ; int *results) global sym(vp8_sad8x16x3_sse3) sym(vp8_sad8x16x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - - mov rdi, arg(4) ;Results - - movd [rdi], mm5 - movd [rdi+4], mm6 - movd [rdi+8], mm7 + mov rcx, result_ptr - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + punpckldq mm5, mm6 + + movq [rcx], mm5 + movd [rcx+8], mm7 + + STACK_FRAME_DESTROY_X3 ;void int vp8_sad8x8x3_sse3( ; unsigned char *src_ptr, @@ -376,36 +483,22 @@ ; int *results) global sym(vp8_sad8x8x3_sse3) sym(vp8_sad8x8x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 + mov rcx, result_ptr - mov rdi, arg(4) ;Results + punpckldq mm5, mm6 - movd [rdi], mm5 - movd [rdi+4], mm6 - movd [rdi+8], mm7 + movq [rcx], mm5 + movd [rcx+8], mm7 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad4x4x3_sse3( ; unsigned char *src_ptr, @@ -415,33 +508,23 @@ ; int *results) global sym(vp8_sad4x4x3_sse3) sym(vp8_sad4x4x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + movd mm0, DWORD PTR [src_ptr] + movd mm1, DWORD PTR [ref_ptr] - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rdi] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rdi+rdx] + movd mm2, DWORD PTR [src_ptr+src_stride] + movd mm3, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm2 punpcklbw mm1, mm3 - movd mm4, DWORD PTR [rdi+1] - movd mm5, DWORD PTR [rdi+2] + movd mm4, DWORD PTR [ref_ptr+1] + movd mm5, DWORD PTR [ref_ptr+2] - movd mm2, DWORD PTR [rdi+rdx+1] - movd mm3, DWORD PTR [rdi+rdx+2] + movd mm2, DWORD PTR [ref_ptr+ref_stride+1] + movd mm3, DWORD PTR [ref_ptr+ref_stride+2] psadbw mm1, mm0 @@ -451,29 +534,27 @@ psadbw mm4, mm0 psadbw mm5, mm0 + lea src_ptr, [src_ptr+src_stride*2] + lea ref_ptr, [ref_ptr+ref_stride*2] + movd mm0, DWORD PTR [src_ptr] + movd mm2, DWORD PTR [ref_ptr] - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm0, DWORD PTR [rsi] - movd mm2, DWORD PTR [rdi] - - movd mm3, DWORD PTR [rsi+rax] - movd mm6, DWORD PTR [rdi+rdx] + movd mm3, DWORD PTR [src_ptr+src_stride] + movd mm6, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm3 punpcklbw mm2, mm6 - movd mm3, DWORD PTR [rdi+1] - movd mm7, DWORD PTR [rdi+2] + movd mm3, DWORD PTR [ref_ptr+1] + movd mm7, DWORD PTR [ref_ptr+2] psadbw mm2, mm0 paddw mm1, mm2 - movd mm2, DWORD PTR [rdi+rdx+1] - movd mm6, DWORD PTR [rdi+rdx+2] + movd mm2, DWORD PTR [ref_ptr+ref_stride+1] + movd mm6, DWORD PTR [ref_ptr+ref_stride+2] punpcklbw mm3, mm2 punpcklbw mm7, mm6 @@ -484,19 +565,14 @@ paddw mm3, mm4 paddw mm7, mm5 - mov rdi, arg(4) ;Results - movd [rdi], mm1 + mov rcx, result_ptr - movd [rdi+4], mm3 - movd [rdi+8], mm7 + punpckldq mm1, mm3 + movq [rcx], mm1 + movd [rcx+8], mm7 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;unsigned int vp8_sad16x16_sse3( ; unsigned char *src_ptr, @@ -507,72 +583,50 @@ ;%define lddqu movdqu global sym(vp8_sad16x16_sse3) sym(vp8_sad16x16_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - lea rcx, [rsi+rbx*8] - - lea rcx, [rcx+rbx*8] - pxor mm7, mm7 - -vp8_sad16x16_sse3_loop: - movq rax, mm7 - cmp rax, arg(4) - jg vp8_sad16x16_early_exit + STACK_FRAME_CREATE_X3 - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] + mov end_ptr, 4 + pxor xmm7, xmm7 - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] +.vp8_sad16x16_sse3_loop: + movdqa xmm0, XMMWORD PTR [src_ptr] + movdqu xmm1, XMMWORD PTR [ref_ptr] + movdqa xmm2, XMMWORD PTR [src_ptr+src_stride] + movdqu xmm3, XMMWORD PTR [ref_ptr+ref_stride] - movq mm4, QWORD PTR [rsi+rbx] - movq mm5, QWORD PTR [rdi+rdx] + lea src_ptr, [src_ptr+src_stride*2] + lea ref_ptr, [ref_ptr+ref_stride*2] - psadbw mm0, mm1 - psadbw mm2, mm3 + movdqa xmm4, XMMWORD PTR [src_ptr] + movdqu xmm5, XMMWORD PTR [ref_ptr] + movdqa xmm6, XMMWORD PTR [src_ptr+src_stride] - movq mm1, QWORD PTR [rsi+rbx+8] - movq mm3, QWORD PTR [rdi+rdx+8] + psadbw xmm0, xmm1 - psadbw mm4, mm5 - psadbw mm1, mm3 + movdqu xmm1, XMMWORD PTR [ref_ptr+ref_stride] - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] + psadbw xmm2, xmm3 + psadbw xmm4, xmm5 + psadbw xmm6, xmm1 - paddw mm0, mm2 - paddw mm4, mm1 - - paddw mm7, mm0 - paddw mm7, mm4 + lea src_ptr, [src_ptr+src_stride*2] + lea ref_ptr, [ref_ptr+ref_stride*2] - cmp rsi, rcx - jne vp8_sad16x16_sse3_loop + paddw xmm7, xmm0 + paddw xmm7, xmm2 + paddw xmm7, xmm4 + paddw xmm7, xmm6 - movq rax, mm7 + sub end_ptr, 1 + jne .vp8_sad16x16_sse3_loop -vp8_sad16x16_early_exit: + movq xmm0, xmm7 + psrldq xmm7, 8 + paddw xmm0, xmm7 + movq rax, xmm0 - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void vp8_sad16x16x4d_sse3( ; unsigned char *src_ptr, @@ -582,69 +636,48 @@ ; int *results) global sym(vp8_sad16x16x4d_sse3) sym(vp8_sad16x16x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - xchg rbx, rax + STACK_FRAME_CREATE_X4 - PROCESS_16X2X4 1 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 + PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr movq xmm0, xmm4 psrldq xmm4, 8 paddw xmm0, xmm4 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+12], xmm0 + movd [rcx+12], xmm0 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 ;void vp8_sad16x8x4d_sse3( ; unsigned char *src_ptr, @@ -654,65 +687,44 @@ ; int *results) global sym(vp8_sad16x8x4d_sse3) sym(vp8_sad16x8x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - mov rsi, arg(0) ;src_ptr + STACK_FRAME_CREATE_X4 - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_16X2X4 1 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 + PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr movq xmm0, xmm4 psrldq xmm4, 8 paddw xmm0, xmm4 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+12], xmm0 + movd [rcx+12], xmm0 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 ;void int vp8_sad8x16x4d_sse3( ; unsigned char *src_ptr, @@ -722,50 +734,30 @@ ; int *results) global sym(vp8_sad8x16x4d_sse3) sym(vp8_sad8x16x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - mov rsi, arg(0) ;src_ptr + STACK_FRAME_CREATE_X4 - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_8X2X4 1 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 + PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr - movd [rdi], mm4 - movd [rdi+4], mm5 - movd [rdi+8], mm6 - movd [rdi+12], mm7 + punpckldq mm4, mm5 + punpckldq mm6, mm7 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + movq [rcx], mm4 + movq [rcx+8], mm6 + + STACK_FRAME_DESTROY_X4 ;void int vp8_sad8x8x4d_sse3( ; unsigned char *src_ptr, @@ -775,46 +767,26 @@ ; int *results) global sym(vp8_sad8x8x4d_sse3) sym(vp8_sad8x8x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - mov rsi, arg(0) ;src_ptr + STACK_FRAME_CREATE_X4 - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_8X2X4 1 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 + PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr - movd [rdi], mm4 - movd [rdi+4], mm5 - movd [rdi+8], mm6 - movd [rdi+12], mm7 + punpckldq mm4, mm5 + punpckldq mm6, mm7 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + movq [rcx], mm4 + movq [rcx+8], mm6 + + STACK_FRAME_DESTROY_X4 ;void int vp8_sad4x4x4d_sse3( ; unsigned char *src_ptr, @@ -824,43 +796,26 @@ ; int *results) global sym(vp8_sad4x4x4d_sse3) sym(vp8_sad4x4x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - - push rbp - mov rdi, arg(2) ; ref_ptr_base - - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - mov rsi, arg(0) ;src_ptr + STACK_FRAME_CREATE_X4 - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride + movd mm0, DWORD PTR [src_ptr] + movd mm1, DWORD PTR [r0_ptr] - xchg rbx, rax - - movd mm0, DWORD PTR [rsi] - movd mm1, DWORD PTR [rcx] - - movd mm2, DWORD PTR [rsi+rax] - movd mm3, DWORD PTR [rcx+rbp] + movd mm2, DWORD PTR [src_ptr+src_stride] + movd mm3, DWORD PTR [r0_ptr+ref_stride] punpcklbw mm0, mm2 punpcklbw mm1, mm3 - movd mm4, DWORD PTR [rdx] - movd mm5, DWORD PTR [rbx] + movd mm4, DWORD PTR [r1_ptr] + movd mm5, DWORD PTR [r2_ptr] - movd mm6, DWORD PTR [rdi] - movd mm2, DWORD PTR [rdx+rbp] + movd mm6, DWORD PTR [r3_ptr] + movd mm2, DWORD PTR [r1_ptr+ref_stride] - movd mm3, DWORD PTR [rbx+rbp] - movd mm7, DWORD PTR [rdi+rbp] + movd mm3, DWORD PTR [r2_ptr+ref_stride] + movd mm7, DWORD PTR [r3_ptr+ref_stride] psadbw mm1, mm0 @@ -875,37 +830,40 @@ - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] + lea src_ptr, [src_ptr+src_stride*2] + lea r0_ptr, [r0_ptr+ref_stride*2] - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] + lea r1_ptr, [r1_ptr+ref_stride*2] + lea r2_ptr, [r2_ptr+ref_stride*2] - lea rdi, [rdi+rbp*2] + lea r3_ptr, [r3_ptr+ref_stride*2] - movd mm0, DWORD PTR [rsi] - movd mm2, DWORD PTR [rcx] + movd mm0, DWORD PTR [src_ptr] + movd mm2, DWORD PTR [r0_ptr] - movd mm3, DWORD PTR [rsi+rax] - movd mm7, DWORD PTR [rcx+rbp] + movd mm3, DWORD PTR [src_ptr+src_stride] + movd mm7, DWORD PTR [r0_ptr+ref_stride] punpcklbw mm0, mm3 punpcklbw mm2, mm7 - movd mm3, DWORD PTR [rdx] - movd mm7, DWORD PTR [rbx] + movd mm3, DWORD PTR [r1_ptr] + movd mm7, DWORD PTR [r2_ptr] psadbw mm2, mm0 +%if ABI_IS_32BIT mov rax, rbp pop rbp - mov rsi, arg(4) ;Results +%define ref_stride rax +%endif + mov rsi, result_ptr paddw mm1, mm2 movd [rsi], mm1 - movd mm2, DWORD PTR [rdx+rax] - movd mm1, DWORD PTR [rbx+rax] + movd mm2, DWORD PTR [r1_ptr+ref_stride] + movd mm1, DWORD PTR [r2_ptr+ref_stride] punpcklbw mm3, mm2 punpcklbw mm7, mm1 @@ -913,8 +871,8 @@ psadbw mm3, mm0 psadbw mm7, mm0 - movd mm2, DWORD PTR [rdi] - movd mm1, DWORD PTR [rdi+rax] + movd mm2, DWORD PTR [r3_ptr] + movd mm1, DWORD PTR [r3_ptr+ref_stride] paddw mm3, mm4 paddw mm7, mm5 @@ -929,10 +887,4 @@ movd [rsi+12], mm2 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 diff -Nru libvpx-0.9.5/vp8/encoder/x86/sad_sse4.asm libvpx-0.9.6/vp8/encoder/x86/sad_sse4.asm --- libvpx-0.9.5/vp8/encoder/x86/sad_sse4.asm 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/sad_sse4.asm 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,353 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%macro PROCESS_16X2X8 1 +%if %1 + movdqa xmm0, XMMWORD PTR [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + movq xmm2, MMWORD PTR [rdi+16] + punpcklqdq xmm1, xmm3 + punpcklqdq xmm3, xmm2 + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm1, xmm2 + paddw xmm1, xmm3 + paddw xmm1, xmm4 +%else + movdqa xmm0, XMMWORD PTR [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + movq xmm2, MMWORD PTR [rdi+16] + punpcklqdq xmm5, xmm3 + punpcklqdq xmm3, xmm2 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm5, xmm2 + paddw xmm5, xmm3 + paddw xmm5, xmm4 + + paddw xmm1, xmm5 +%endif + movdqa xmm0, XMMWORD PTR [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + movq xmm2, MMWORD PTR [rdi+ rdx+16] + punpcklqdq xmm5, xmm3 + punpcklqdq xmm3, xmm2 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm5, xmm2 + paddw xmm5, xmm3 + paddw xmm5, xmm4 + + paddw xmm1, xmm5 +%endmacro + +%macro PROCESS_8X2X8 1 +%if %1 + movq xmm0, MMWORD PTR [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm1, xmm3 + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm1, xmm2 +%else + movq xmm0, MMWORD PTR [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm5, xmm3 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm5, xmm2 + + paddw xmm1, xmm5 +%endif + movq xmm0, MMWORD PTR [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + punpcklqdq xmm5, xmm3 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm5, xmm2 + + paddw xmm1, xmm5 +%endmacro + +%macro PROCESS_4X2X8 1 +%if %1 + movd xmm0, [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm1, xmm3 + + mpsadbw xmm1, xmm0, 0x0 +%else + movd xmm0, [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm5, xmm3 + + mpsadbw xmm5, xmm0, 0x0 + + paddw xmm1, xmm5 +%endif + movd xmm0, [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + punpcklqdq xmm5, xmm3 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + mpsadbw xmm5, xmm0, 0x0 + + paddw xmm1, xmm5 +%endmacro + + +;void vp8_sad16x16x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array); +global sym(vp8_sad16x16x8_sse4) +sym(vp8_sad16x16x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_16X2X8 1 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad16x8x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad16x8x8_sse4) +sym(vp8_sad16x8x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_16X2X8 1 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad8x8x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad8x8x8_sse4) +sym(vp8_sad8x8x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_8X2X8 1 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad8x16x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad8x16x8_sse4) +sym(vp8_sad8x16x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_8X2X8 1 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad4x4x8_c( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad4x4x8_sse4) +sym(vp8_sad4x4x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_4X2X8 1 + PROCESS_4X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + + + diff -Nru libvpx-0.9.5/vp8/encoder/x86/subtract_sse2.asm libvpx-0.9.6/vp8/encoder/x86/subtract_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/subtract_sse2.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/subtract_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -77,6 +77,7 @@ push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -138,6 +139,7 @@ pop rsi ; begin epilog RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff -Nru libvpx-0.9.5/vp8/encoder/x86/temporal_filter_apply_sse2.asm libvpx-0.9.6/vp8/encoder/x86/temporal_filter_apply_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/temporal_filter_apply_sse2.asm 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/temporal_filter_apply_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,207 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +; void vp8_temporal_filter_apply_sse2 | arg +; (unsigned char *frame1, | 0 +; unsigned int stride, | 1 +; unsigned char *frame2, | 2 +; unsigned int block_size, | 3 +; int strength, | 4 +; int filter_weight, | 5 +; unsigned int *accumulator, | 6 +; unsigned short *count) | 7 +global sym(vp8_temporal_filter_apply_sse2) +sym(vp8_temporal_filter_apply_sse2): + + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ALIGN_STACK 16, rax + %define block_size 0 + %define strength 16 + %define filter_weight 32 + %define rounding_bit 48 + %define rbp_backup 64 + %define stack_size 80 + sub rsp, stack_size + mov [rsp + rbp_backup], rbp + ; end prolog + + mov rdx, arg(3) + mov [rsp + block_size], rdx + movd xmm6, arg(4) + movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read + + ; calculate the rounding bit outside the loop + ; 0x8000 >> (16 - strength) + mov rdx, 16 + sub rdx, arg(4) ; 16 - strength + movd xmm4, rdx ; can't use rdx w/ shift + movdqa xmm5, [GLOBAL(_const_top_bit)] + psrlw xmm5, xmm4 + movdqa [rsp + rounding_bit], xmm5 + + mov rsi, arg(0) ; src/frame1 + mov rdx, arg(2) ; predictor frame + mov rdi, arg(6) ; accumulator + mov rax, arg(7) ; count + + ; dup the filter weight and store for later + movd xmm0, arg(5) ; filter_weight + pshuflw xmm0, xmm0, 0 + punpcklwd xmm0, xmm0 + movdqa [rsp + filter_weight], xmm0 + + mov rbp, arg(1) ; stride + pxor xmm7, xmm7 ; zero for extraction + + lea rcx, [rdx + 16*16*1] + cmp dword ptr [rsp + block_size], 8 + jne temporal_filter_apply_load_16 + lea rcx, [rdx + 8*8*1] + +temporal_filter_apply_load_8: + movq xmm0, [rsi] ; first row + lea rsi, [rsi + rbp] ; += stride + punpcklbw xmm0, xmm7 ; src[ 0- 7] + movq xmm1, [rsi] ; second row + lea rsi, [rsi + rbp] ; += stride + punpcklbw xmm1, xmm7 ; src[ 8-15] + jmp temporal_filter_apply_load_finished + +temporal_filter_apply_load_16: + movdqa xmm0, [rsi] ; src (frame1) + lea rsi, [rsi + rbp] ; += stride + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 ; src[ 0- 7] + punpckhbw xmm1, xmm7 ; src[ 8-15] + +temporal_filter_apply_load_finished: + movdqa xmm2, [rdx] ; predictor (frame2) + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm7 ; pred[ 0- 7] + punpckhbw xmm3, xmm7 ; pred[ 8-15] + + ; modifier = src_byte - pixel_value + psubw xmm0, xmm2 ; src - pred[ 0- 7] + psubw xmm1, xmm3 ; src - pred[ 8-15] + + ; modifier *= modifier + pmullw xmm0, xmm0 ; modifer[ 0- 7]^2 + pmullw xmm1, xmm1 ; modifer[ 8-15]^2 + + ; modifier *= 3 + pmullw xmm0, [GLOBAL(_const_3w)] + pmullw xmm1, [GLOBAL(_const_3w)] + + ; modifer += 0x8000 >> (16 - strength) + paddw xmm0, [rsp + rounding_bit] + paddw xmm1, [rsp + rounding_bit] + + ; modifier >>= strength + psrlw xmm0, [rsp + strength] + psrlw xmm1, [rsp + strength] + + ; modifier = 16 - modifier + ; saturation takes care of modifier > 16 + movdqa xmm3, [GLOBAL(_const_16w)] + movdqa xmm2, [GLOBAL(_const_16w)] + psubusw xmm3, xmm1 + psubusw xmm2, xmm0 + + ; modifier *= filter_weight + pmullw xmm2, [rsp + filter_weight] + pmullw xmm3, [rsp + filter_weight] + + ; count + movdqa xmm4, [rax] + movdqa xmm5, [rax+16] + ; += modifier + paddw xmm4, xmm2 + paddw xmm5, xmm3 + ; write back + movdqa [rax], xmm4 + movdqa [rax+16], xmm5 + lea rax, [rax + 16*2] ; count += 16*(sizeof(short)) + + ; load and extract the predictor up to shorts + pxor xmm7, xmm7 + movdqa xmm0, [rdx] + lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char)) + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 ; pred[ 0- 7] + punpckhbw xmm1, xmm7 ; pred[ 8-15] + + ; modifier *= pixel_value + pmullw xmm0, xmm2 + pmullw xmm1, xmm3 + + ; expand to double words + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm7 ; [ 0- 3] + punpckhwd xmm2, xmm7 ; [ 4- 7] + movdqa xmm3, xmm1 + punpcklwd xmm1, xmm7 ; [ 8-11] + punpckhwd xmm3, xmm7 ; [12-15] + + ; accumulator + movdqa xmm4, [rdi] + movdqa xmm5, [rdi+16] + movdqa xmm6, [rdi+32] + movdqa xmm7, [rdi+48] + ; += modifier + paddw xmm4, xmm0 + paddw xmm5, xmm2 + paddw xmm6, xmm1 + paddw xmm7, xmm3 + ; write back + movdqa [rdi], xmm4 + movdqa [rdi+16], xmm5 + movdqa [rdi+32], xmm6 + movdqa [rdi+48], xmm7 + lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int)) + + cmp rdx, rcx + je temporal_filter_apply_epilog + pxor xmm7, xmm7 ; zero for extraction + cmp dword ptr [rsp + block_size], 16 + je temporal_filter_apply_load_16 + jmp temporal_filter_apply_load_8 + +temporal_filter_apply_epilog: + ; begin epilog + mov rbp, [rsp + rbp_backup] + add rsp, stack_size + pop rsp + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +_const_3w: + times 8 dw 3 +align 16 +_const_top_bit: + times 8 dw 1<<15 +align 16 +_const_16w + times 8 dw 16 diff -Nru libvpx-0.9.5/vp8/encoder/x86/temporal_filter_x86.h libvpx-0.9.6/vp8/encoder/x86/temporal_filter_x86.h --- libvpx-0.9.5/vp8/encoder/x86/temporal_filter_x86.h 1970-01-01 00:00:00.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/temporal_filter_x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_VP8_TEMPORAL_FILTER_X86_H +#define __INC_VP8_TEMPORAL_FILTER_X86_H + +#if HAVE_SSE2 +extern prototype_apply(vp8_temporal_filter_apply_sse2); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_temporal_filter_apply +#define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 + +#endif + +#endif + +#endif // __INC_VP8_TEMPORAL_FILTER_X86_H diff -Nru libvpx-0.9.5/vp8/encoder/x86/variance_impl_sse2.asm libvpx-0.9.6/vp8/encoder/x86/variance_impl_sse2.asm --- libvpx-0.9.5/vp8/encoder/x86/variance_impl_sse2.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/variance_impl_sse2.asm 2011-03-04 20:40:40.000000000 +0000 @@ -85,10 +85,9 @@ push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx + push rbx push rsi push rdi - sub rsp, 16 ; end prolog mov rsi, arg(0) ;[src_ptr] @@ -97,6 +96,29 @@ movsxd rax, DWORD PTR arg(1) ;[source_stride] movsxd rdx, DWORD PTR arg(3) ;[recon_stride] + ; Prefetch data + lea rcx, [rax+rax*2] + prefetcht0 [rsi] + prefetcht0 [rsi+rax] + prefetcht0 [rsi+rax*2] + prefetcht0 [rsi+rcx] + lea rbx, [rsi+rax*4] + prefetcht0 [rbx] + prefetcht0 [rbx+rax] + prefetcht0 [rbx+rax*2] + prefetcht0 [rbx+rcx] + + lea rcx, [rdx+rdx*2] + prefetcht0 [rdi] + prefetcht0 [rdi+rdx] + prefetcht0 [rdi+rdx*2] + prefetcht0 [rdi+rcx] + lea rbx, [rdi+rdx*4] + prefetcht0 [rbx] + prefetcht0 [rbx+rdx] + prefetcht0 [rbx+rdx*2] + prefetcht0 [rbx+rcx] + pxor xmm0, xmm0 ; clear xmm0 for unpack pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs @@ -107,6 +129,9 @@ movdqu xmm1, XMMWORD PTR [rsi] movdqu xmm2, XMMWORD PTR [rdi] + prefetcht0 [rsi+rax*8] + prefetcht0 [rdi+rdx*8] + movdqa xmm3, xmm1 movdqa xmm4, xmm2 @@ -178,10 +203,9 @@ ; begin epilog - add rsp, 16 pop rdi pop rsi - RESTORE_GOT + pop rbx UNSHADOW_ARGS pop rbp ret @@ -493,8 +517,8 @@ ; unsigned char *src_ptr, ; int src_pixels_per_line, ; unsigned int Height, -; unsigned short *HFilter, -; unsigned short *VFilter, +; int xoffset, +; int yoffset, ; int *sum, ; unsigned int *sumsquared;; ; @@ -504,68 +528,80 @@ push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 + SAVE_XMM GET_GOT rbx push rsi push rdi - sub rsp, 16 + push rbx ; end prolog pxor xmm6, xmm6 ; pxor xmm7, xmm7 ; - mov rax, arg(5) ;HFilter ; - mov rdx, arg(6) ;VFilter ; - mov rsi, arg(0) ;ref_ptr ; + lea rsi, [GLOBAL(xmm_bi_rd)] ; rounding + movdqa xmm4, XMMWORD PTR [rsi] - mov rdi, arg(2) ;src_ptr ; - movsxd rcx, dword ptr arg(4) ;Height ; + lea rcx, [GLOBAL(vp8_bilinear_filters_sse2)] + movsxd rax, dword ptr arg(5) ; xoffset + + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je filter_block2d_bil_var_sse2_sp_only + + shl rax, 5 ; point to filter coeff with xoffset + lea rax, [rax + rcx] ; HFilter + + movsxd rdx, dword ptr arg(6) ; yoffset + + cmp rdx, 0 ; skip second_pass filter if yoffset=0 + je filter_block2d_bil_var_sse2_fp_only + + shl rdx, 5 + lea rdx, [rdx + rcx] ; VFilter + + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height pxor xmm0, xmm0 ; - movq xmm1, QWORD PTR [rsi] ; + movq xmm1, QWORD PTR [rsi] ; + movq xmm3, QWORD PTR [rsi+1] ; - movq xmm3, QWORD PTR [rsi+1] ; punpcklbw xmm1, xmm0 ; - - pmullw xmm1, [rax] ; + pmullw xmm1, [rax] ; punpcklbw xmm3, xmm0 - ; pmullw xmm3, [rax+16] ; - paddw xmm1, xmm3 ; - - paddw xmm1, [GLOBAL(xmm_bi_rd)] ; - psraw xmm1, xmm_filter_shift ; + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; movdqa xmm5, xmm1 -%if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; -%else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - add rsi, r8 + + movsxd rbx, dword ptr arg(1) ;ref_pixels_per_line + lea rsi, [rsi + rbx] +%if ABI_IS_32BIT=0 + movsxd r9, dword ptr arg(3) ;src_pixels_per_line %endif -filter_block2d_bil_var_sse2_loop: +filter_block2d_bil_var_sse2_loop: movq xmm1, QWORD PTR [rsi] ; movq xmm3, QWORD PTR [rsi+1] ; punpcklbw xmm1, xmm0 ; pmullw xmm1, [rax] ; - punpcklbw xmm3, xmm0 ; pmullw xmm3, [rax+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [GLOBAL(xmm_bi_rd)] ; - + paddw xmm1, xmm4 ; psraw xmm1, xmm_filter_shift ; - movdqa xmm3, xmm5 ; + movdqa xmm3, xmm5 ; movdqa xmm5, xmm1 ; - pmullw xmm3, [rdx] ; + pmullw xmm3, [rdx] ; pmullw xmm1, [rdx+16] ; paddw xmm1, xmm3 ; - - paddw xmm1, [GLOBAL(xmm_bi_rd)] ; + paddw xmm1, xmm4 ; psraw xmm1, xmm_filter_shift ; movq xmm3, QWORD PTR [rdi] ; @@ -577,20 +613,103 @@ pmaddwd xmm1, xmm1 ; paddd xmm7, xmm1 ; + lea rsi, [rsi + rbx] ;ref_pixels_per_line %if ABI_IS_32BIT - add rsi, dword ptr arg(1) ;ref_pixels_per_line ; - add rdi, dword ptr arg(3) ;src_pixels_per_line ; + add rdi, dword ptr arg(3) ;src_pixels_per_line %else - movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ; - movsxd r9, dword ptr arg(3) ;src_pixels_per_line ; - add rsi, r8 - add rdi, r9 + lea rdi, [rdi + r9] %endif sub rcx, 1 ; jnz filter_block2d_bil_var_sse2_loop ; + jmp filter_block2d_bil_variance + +filter_block2d_bil_var_sse2_sp_only: + movsxd rdx, dword ptr arg(6) ; yoffset + shl rdx, 5 + lea rdx, [rdx + rcx] ; VFilter + + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rax, dword ptr arg(1) ;ref_pixels_per_line + + pxor xmm0, xmm0 ; + movq xmm1, QWORD PTR [rsi] ; + punpcklbw xmm1, xmm0 ; + + movsxd rbx, dword ptr arg(3) ;src_pixels_per_line + lea rsi, [rsi + rax] + +filter_block2d_bil_sp_only_loop: + movq xmm3, QWORD PTR [rsi] ; + punpcklbw xmm3, xmm0 ; + movdqa xmm5, xmm3 + pmullw xmm1, [rdx] ; + pmullw xmm3, [rdx+16] ; + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movq xmm3, QWORD PTR [rdi] ; + punpcklbw xmm3, xmm0 ; + + psubw xmm1, xmm3 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + + movdqa xmm1, xmm5 ; + lea rsi, [rsi + rax] ;ref_pixels_per_line + lea rdi, [rdi + rbx] ;src_pixels_per_line + + sub rcx, 1 ; + jnz filter_block2d_bil_sp_only_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_var_sse2_fp_only: + mov rsi, arg(0) ;ref_ptr + mov rdi, arg(2) ;src_ptr + movsxd rcx, dword ptr arg(4) ;Height + movsxd rdx, dword ptr arg(1) ;ref_pixels_per_line + + pxor xmm0, xmm0 ; + movsxd rbx, dword ptr arg(3) ;src_pixels_per_line + +filter_block2d_bil_fp_only_loop: + movq xmm1, QWORD PTR [rsi] ; + movq xmm3, QWORD PTR [rsi+1] ; + + punpcklbw xmm1, xmm0 ; + pmullw xmm1, [rax] ; + punpcklbw xmm3, xmm0 ; + pmullw xmm3, [rax+16] ; + + paddw xmm1, xmm3 ; + paddw xmm1, xmm4 ; + psraw xmm1, xmm_filter_shift ; + + movq xmm3, QWORD PTR [rdi] ; + punpcklbw xmm3, xmm0 ; + + psubw xmm1, xmm3 ; + paddw xmm6, xmm1 ; + + pmaddwd xmm1, xmm1 ; + paddd xmm7, xmm1 ; + lea rsi, [rsi + rdx] + lea rdi, [rdi + rbx] ;src_pixels_per_line + + sub rcx, 1 ; + jnz filter_block2d_bil_fp_only_loop ; + + jmp filter_block2d_bil_variance + +filter_block2d_bil_variance: movdq2q mm6, xmm6 ; movdq2q mm7, xmm7 ; @@ -627,12 +746,12 @@ movd [rsi], mm2 ; xsum movd [rdi], mm4 ; xxsum - ; begin epilog - add rsp, 16 + pop rbx pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -974,3 +1093,13 @@ align 16 xmm_bi_rd: times 8 dw 64 +align 16 +vp8_bilinear_filters_sse2: + dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 + dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 + dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 + dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 + dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 + dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 + dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 + dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 diff -Nru libvpx-0.9.5/vp8/encoder/x86/variance_mmx.c libvpx-0.9.6/vp8/encoder/x86/variance_mmx.c --- libvpx-0.9.5/vp8/encoder/x86/variance_mmx.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/variance_mmx.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ -#include "variance.h" -#include "pragmas.h" +#include "vp8/encoder/variance.h" +#include "vp8/common/pragmas.h" #include "vpx_ports/mem.h" extern void filter_block1d_h6_mmx diff -Nru libvpx-0.9.5/vp8/encoder/x86/variance_sse2.c libvpx-0.9.6/vp8/encoder/x86/variance_sse2.c --- libvpx-0.9.5/vp8/encoder/x86/variance_sse2.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/variance_sse2.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,8 +9,8 @@ */ -#include "variance.h" -#include "pragmas.h" +#include "vp8/encoder/variance.h" +#include "vp8/common/pragmas.h" #include "vpx_ports/mem.h" extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); @@ -76,8 +76,8 @@ const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, - const short *HFilter, - const short *VFilter, + int xoffset, + int yoffset, int *sum, unsigned int *sumsquared ); @@ -222,21 +222,6 @@ } -/////////////////////////////////////////////////////////////////////////// -// the mmx function that does the bilinear filtering and var calculation // -// int one pass // -/////////////////////////////////////////////////////////////////////////// -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) = -{ - { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } -}; unsigned int vp8_sub_pixel_variance4x4_wmt ( const unsigned char *src_ptr, @@ -272,15 +257,38 @@ unsigned int *sse ) { - int xsum; unsigned int xxsum; - vp8_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], - &xsum, &xxsum - ); + + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum, &xxsum); + } + else if (xoffset == 0 && yoffset == 4) + { + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum, &xxsum); + } + else if (xoffset == 4 && yoffset == 4) + { + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum, &xxsum); + } + else + { + vp8_filter_block2d_bil_var_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + xoffset, yoffset, + &xsum, &xxsum); + } *sse = xxsum; return (xxsum - ((xsum * xsum) >> 6)); @@ -344,7 +352,7 @@ vp8_filter_block2d_bil_var_sse2( src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, 16, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], + xoffset, yoffset, &xsum0, &xxsum0 ); @@ -352,7 +360,7 @@ vp8_filter_block2d_bil_var_sse2( src_ptr + 8, src_pixels_per_line, dst_ptr + 8, dst_pixels_per_line, 16, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], + xoffset, yoffset, &xsum1, &xxsum1 ); } @@ -392,21 +400,56 @@ int xsum0, xsum1; unsigned int xxsum0, xxsum1; + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); - vp8_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 8, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], - &xsum0, &xxsum0 - ); + vp8_half_horiz_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 8, + &xsum1, &xxsum1); + } + else if (xoffset == 0 && yoffset == 4) + { + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + vp8_half_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 8, + &xsum1, &xxsum1); + } + else if (xoffset == 4 && yoffset == 4) + { + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); - vp8_filter_block2d_bil_var_sse2( - src_ptr + 8, src_pixels_per_line, - dst_ptr + 8, dst_pixels_per_line, 8, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], - &xsum1, &xxsum1 - ); + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 8, + &xsum1, &xxsum1); + } + else + { + vp8_filter_block2d_bil_var_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + xoffset, yoffset, + &xsum0, &xxsum0); + + vp8_filter_block2d_bil_var_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 8, + xoffset, yoffset, + &xsum1, &xxsum1); + } xsum0 += xsum1; xxsum0 += xxsum1; @@ -428,12 +471,36 @@ { int xsum; unsigned int xxsum; - vp8_filter_block2d_bil_var_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - vp8_bilinear_filters_xmm[xoffset], vp8_bilinear_filters_xmm[yoffset], - &xsum, &xxsum - ); + + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum, &xxsum); + } + else if (xoffset == 0 && yoffset == 4) + { + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum, &xxsum); + } + else if (xoffset == 4 && yoffset == 4) + { + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum, &xxsum); + } + else + { + vp8_filter_block2d_bil_var_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + xoffset, yoffset, + &xsum, &xxsum); + } *sse = xxsum; return (xxsum - ((xsum * xsum) >> 7)); diff -Nru libvpx-0.9.5/vp8/encoder/x86/variance_x86.h libvpx-0.9.6/vp8/encoder/x86/variance_x86.h --- libvpx-0.9.5/vp8/encoder/x86/variance_x86.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/variance_x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -297,4 +297,31 @@ #endif #endif + +#if HAVE_SSE4_1 +extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4); + +#if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_variance_sad16x16x8 +#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4 + +#undef vp8_variance_sad16x8x8 +#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4 + +#undef vp8_variance_sad8x16x8 +#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4 + +#undef vp8_variance_sad8x8x8 +#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4 + +#undef vp8_variance_sad4x4x8 +#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4 + +#endif +#endif + #endif diff -Nru libvpx-0.9.5/vp8/encoder/x86/x86_csystemdependent.c libvpx-0.9.6/vp8/encoder/x86/x86_csystemdependent.c --- libvpx-0.9.5/vp8/encoder/x86/x86_csystemdependent.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/encoder/x86/x86_csystemdependent.c 2011-03-04 20:40:40.000000000 +0000 @@ -11,18 +11,17 @@ #include "vpx_ports/config.h" #include "vpx_ports/x86.h" -#include "variance.h" -#include "onyx_int.h" +#include "vp8/encoder/variance.h" +#include "vp8/encoder/onyx_int.h" #if HAVE_MMX void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) { - vp8_short_fdct4x4_c(input, output, pitch); - vp8_short_fdct4x4_c(input + 4, output + 16, pitch); + vp8_short_fdct4x4_mmx(input, output, pitch); + vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); } - int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, short *qcoeff_ptr, short *dequant_ptr, short *scan_mask, short *round_ptr, @@ -33,7 +32,7 @@ short *coeff_ptr = b->coeff; short *zbin_ptr = b->zbin; short *round_ptr = b->round; - short *quant_ptr = b->quant; + short *quant_ptr = b->quant_fast; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; @@ -82,22 +81,16 @@ #endif #if HAVE_SSE2 -void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) -{ - vp8_short_fdct4x4_sse2(input, output, pitch); - vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch); -} - int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, short *qcoeff_ptr, short *dequant_ptr, - short *scan_mask, short *round_ptr, + const short *inv_scan_order, short *round_ptr, short *quant_ptr, short *dqcoeff_ptr); void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) { short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; short *coeff_ptr = b->coeff; short *round_ptr = b->round; - short *quant_ptr = b->quant; + short *quant_ptr = b->quant_fast; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; @@ -106,8 +99,7 @@ coeff_ptr, qcoeff_ptr, dequant_ptr, - scan_mask, - + vp8_default_inv_zig_zag, round_ptr, quant_ptr, dqcoeff_ptr @@ -116,37 +108,26 @@ int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, - short *qcoeff_ptr,short *dequant_ptr, - const int *default_zig_zag, short *round_ptr, - short *quant_ptr, short *dqcoeff_ptr, - unsigned short zbin_oq_value, - short *zbin_boost_ptr); + short *qcoeff_ptr,short *dequant_ptr, + const int *default_zig_zag, short *round_ptr, + short *quant_ptr, short *dqcoeff_ptr, + unsigned short zbin_oq_value, + short *zbin_boost_ptr, + short *quant_shift_ptr); void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d) { - short *zbin_boost_ptr = b->zrun_zbin_boost; - short *coeff_ptr = b->coeff; - short *zbin_ptr = b->zbin; - short *round_ptr = b->round; - short *quant_ptr = b->quant; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = d->dequant; - short zbin_oq_value = b->zbin_extra; - - d->eob = vp8_regular_quantize_b_impl_sse2( - coeff_ptr, - zbin_ptr, - qcoeff_ptr, - dequant_ptr, - vp8_default_zig_zag1d, - - round_ptr, - quant_ptr, - dqcoeff_ptr, - zbin_oq_value, - zbin_boost_ptr - ); + d->eob = vp8_regular_quantize_b_impl_sse2(b->coeff, + b->zbin, + d->qcoeff, + d->dequant, + vp8_default_zig_zag1d, + b->round, + b->quant, + d->dqcoeff, + b->zbin_extra, + b->zrun_zbin_boost, + b->quant_shift); } int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); @@ -179,6 +160,25 @@ #endif +#if HAVE_SSSE3 +int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, + short *qcoeff_ptr, short *dequant_ptr, + short *round_ptr, + short *quant_ptr, short *dqcoeff_ptr); +void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) +{ + d->eob = vp8_fast_quantize_b_impl_ssse3( + b->coeff, + d->qcoeff, + d->dequant, + b->round, + b->quant_fast, + d->dqcoeff + ); +} +#endif + + void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT @@ -188,6 +188,7 @@ int wmt_enabled = flags & HAS_SSE2; int SSE3Enabled = flags & HAS_SSE3; int SSSE3Enabled = flags & HAS_SSSE3; + int SSE4_1Enabled = flags & HAS_SSE4_1; /* Note: * @@ -198,7 +199,6 @@ /* Override default functions with fastest ones for this CPU. */ #if HAVE_MMX - if (mmx_enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; @@ -230,18 +230,11 @@ cpi->rtcd.variance.get8x8var = vp8_get8x8var_mmx; cpi->rtcd.variance.get16x16var = vp8_get16x16var_mmx; cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_mmx; -#if 0 // new fdct + cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx; -#else - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c; - -#endif cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; @@ -254,10 +247,9 @@ /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ } - #endif -#if HAVE_SSE2 +#if HAVE_SSE2 if (wmt_enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; @@ -304,13 +296,18 @@ cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; - /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/ +#if ARCH_X86 + cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2; +#endif cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; - } +#if !(CONFIG_REALTIME_ONLY) + cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; +#endif + } #endif -#if HAVE_SSE3 +#if HAVE_SSE3 if (SSE3Enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; @@ -319,8 +316,9 @@ cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sadx3; - +#endif cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; @@ -328,16 +326,32 @@ cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; } - #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3 if (SSSE3Enabled) { cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; + + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; + } +#endif +#if HAVE_SSE4_1 + if (SSE4_1Enabled) + { + cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; + cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; + cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; + cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; + cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; +#if !(CONFIG_REALTIME_ONLY) + cpi->rtcd.search.full_search = vp8_full_search_sadx8; #endif + } +#endif + #endif } diff -Nru libvpx-0.9.5/vp8/vp8_common.mk libvpx-0.9.6/vp8/vp8_common.mk --- libvpx-0.9.5/vp8/vp8_common.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8_common.mk 2011-03-04 20:40:40.000000000 +0000 @@ -8,25 +8,12 @@ ## be found in the AUTHORS file in the root of the source tree. ## - -#add this file to the installed sources list VP8_COMMON_SRCS-yes += vp8_common.mk - -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common VP8_COMMON_SRCS-yes += common/type_aliases.h VP8_COMMON_SRCS-yes += common/pragmas.h - -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common -VP8_COMMON_SRCS-yes += common/preproc.h -VP8_COMMON_SRCS-yes += common/vpxerrors.h - -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common VP8_COMMON_SRCS-yes += common/ppflags.h VP8_COMMON_SRCS-yes += common/onyx.h VP8_COMMON_SRCS-yes += common/onyxd.h - -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common - VP8_COMMON_SRCS-yes += common/alloccommon.c VP8_COMMON_SRCS-yes += common/blockd.c VP8_COMMON_SRCS-yes += common/coefupdateprobs.h @@ -36,7 +23,7 @@ VP8_COMMON_SRCS-yes += common/entropymode.c VP8_COMMON_SRCS-yes += common/entropymv.c VP8_COMMON_SRCS-yes += common/extend.c -VP8_COMMON_SRCS-yes += common/filter_c.c +VP8_COMMON_SRCS-yes += common/filter.c VP8_COMMON_SRCS-yes += common/findnearmv.c VP8_COMMON_SRCS-yes += common/generic/systemdependent.c VP8_COMMON_SRCS-yes += common/idctllm.c @@ -57,7 +44,6 @@ VP8_COMMON_SRCS-yes += common/modecont.h VP8_COMMON_SRCS-yes += common/mv.h VP8_COMMON_SRCS-yes += common/onyxc_int.h -VP8_COMMON_SRCS-yes += common/predictdc.h VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/recon.h VP8_COMMON_SRCS-yes += common/reconinter.h @@ -75,7 +61,6 @@ VP8_COMMON_SRCS-yes += common/mbpitch.c VP8_COMMON_SRCS-yes += common/modecont.c VP8_COMMON_SRCS-yes += common/modecontext.c -VP8_COMMON_SRCS-yes += common/predictdc.c VP8_COMMON_SRCS-yes += common/quant_common.c VP8_COMMON_SRCS-yes += common/recon.c VP8_COMMON_SRCS-yes += common/reconinter.c @@ -112,14 +97,15 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm endif +VP8_COMMON_SRCS-$(ARCH_ARM) += common/asm_com_offsets.c VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c # common (c) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.h VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c -VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c # common (armv6) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) @@ -162,16 +148,3 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c - - -# -# Rule to extract assembly constants from C sources -# -ifeq ($(ARCH_ARM),yes) -vpx_asm_offsets.asm: obj_int_extract -vpx_asm_offsets.asm: $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o - ./obj_int_extract rvds $< $(ADS2GAS) > $@ -OBJS-yes += $(VP8_PREFIX)common/arm/vpx_asm_offsets.c.o -CLEAN-OBJS += vpx_asm_offsets.asm -$(filter %$(ASM).o,$(OBJS-yes)): vpx_asm_offsets.asm -endif diff -Nru libvpx-0.9.5/vp8/vp8cx_arm.mk libvpx-0.9.6/vp8/vp8cx_arm.mk --- libvpx-0.9.5/vp8/vp8cx_arm.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8cx_arm.mk 2011-03-04 20:40:40.000000000 +0000 @@ -14,10 +14,13 @@ #File list for arm # encoder VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c +VP8_CX_SRCS-$(ARCH_ARM) += encoder/asm_enc_offsets.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.c +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.h VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c @@ -31,6 +34,8 @@ #File list for armv6 # encoder +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM) #File list for neon @@ -49,17 +54,3 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) - -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c - -# -# Rule to extract assembly constants from C sources -# -ifeq ($(ARCH_ARM),yes) -vpx_vp8_enc_asm_offsets.asm: obj_int_extract -vpx_vp8_enc_asm_offsets.asm: $(VP8_PREFIX)encoder/arm/vpx_vp8_enc_asm_offsets.c.o - ./obj_int_extract rvds $< $(ADS2GAS) > $@ -OBJS-yes += $(VP8_PREFIX)encoder/arm/vpx_vp7_enc_asm_offsets.c.o -CLEAN-OBJS += vpx_vp8_enc_asm_offsets.asm -$(filter %$(ASM).o,$(OBJS-yes)): vpx_vp8_enc_asm_offsets.asm -endif diff -Nru libvpx-0.9.5/vp8/vp8_cx_iface.c libvpx-0.9.6/vp8/vp8_cx_iface.c --- libvpx-0.9.5/vp8/vp8_cx_iface.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8_cx_iface.c 2011-03-04 20:40:40.000000000 +0000 @@ -12,10 +12,10 @@ #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" -#include "onyx_int.h" +#include "vp8/encoder/onyx_int.h" #include "vpx/vp8e.h" #include "vp8/encoder/firstpass.h" -#include "onyx.h" +#include "vp8/common/onyx.h" #include #include @@ -37,6 +37,8 @@ unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */ unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */ unsigned int arnr_type; /* alt_ref filter type */ + vp8e_tuning tuning; + unsigned int cq_level; /* constrained quality level */ }; @@ -67,6 +69,8 @@ 0, /* arnr_max_frames */ 3, /* arnr_strength */ 3, /* arnr_type*/ + 0, /* tuning*/ + 10, /* cq_level */ } } }; @@ -104,6 +108,7 @@ } +#undef ERROR #define ERROR(str) do {\ ctx->base.err_detail = str;\ return VPX_CODEC_INVALID_PARAM;\ @@ -132,20 +137,20 @@ const vpx_codec_enc_cfg_t *cfg, const struct vp8_extracfg *vp8_cfg) { - RANGE_CHECK(cfg, g_w, 2, 16384); - RANGE_CHECK(cfg, g_h, 2, 16384); + RANGE_CHECK(cfg, g_w, 1, 16384); + RANGE_CHECK(cfg, g_h, 1, 16384); RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); RANGE_CHECK_HI(cfg, g_profile, 3); - RANGE_CHECK_HI(cfg, rc_min_quantizer, 63); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); + RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); RANGE_CHECK_HI(cfg, g_threads, 64); #if !(CONFIG_REALTIME_ONLY) RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); #else RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #endif - RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR); + RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); @@ -187,7 +192,9 @@ RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); + RANGE_CHECK(vp8_cfg, cq_level, 0, 63); +#if !(CONFIG_REALTIME_ONLY) if (cfg->g_pass == VPX_RC_LAST_PASS) { int mb_r = (cfg->g_h + 15) / 16; @@ -211,6 +218,7 @@ if ((int)(stats->count + 0.5) != n_packets - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } +#endif return VPX_CODEC_OK; } @@ -295,11 +303,16 @@ { oxcf->end_usage = USAGE_STREAM_FROM_SERVER; } + else if (cfg.rc_end_usage == VPX_CQ) + { + oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; + } oxcf->target_bandwidth = cfg.rc_target_bitrate; oxcf->best_allowed_q = cfg.rc_min_quantizer; oxcf->worst_allowed_q = cfg.rc_max_quantizer; + oxcf->cq_level = vp8_cfg.cq_level; oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg.rc_undershoot_pct; @@ -335,6 +348,7 @@ oxcf->arnr_strength = vp8_cfg.arnr_strength; oxcf->arnr_type = vp8_cfg.arnr_type; + oxcf->tuning = vp8_cfg.tuning; /* printf("Current VP8 Settings: \n"); @@ -448,6 +462,8 @@ MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames); MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength); MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type); + MAP(VP8E_SET_TUNING, xcfg.tuning); + MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); } @@ -476,57 +492,67 @@ { priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); - if (priv) + if (!priv) { - ctx->priv = &priv->base; - ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; - ctx->priv->alg_priv = priv; - ctx->priv->init_flags = ctx->init_flags; - - if (ctx->config.enc) - { - /* Update the reference to the config structure to an - * internal copy. - */ - ctx->priv->alg_priv->cfg = *ctx->config.enc; - ctx->config.enc = &ctx->priv->alg_priv->cfg; - } + return VPX_CODEC_MEM_ERROR; + } - cfg = &ctx->priv->alg_priv->cfg; + ctx->priv = &priv->base; + ctx->priv->sz = sizeof(*ctx->priv); + ctx->priv->iface = ctx->iface; + ctx->priv->alg_priv = priv; + ctx->priv->init_flags = ctx->init_flags; - /* Select the extra vp6 configuration table based on the current - * usage value. If the current usage value isn't found, use the - * values for usage case 0. + if (ctx->config.enc) + { + /* Update the reference to the config structure to an + * internal copy. */ - for (i = 0; - extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; - i++); + ctx->priv->alg_priv->cfg = *ctx->config.enc; + ctx->config.enc = &ctx->priv->alg_priv->cfg; + } - priv->vp8_cfg = extracfg_map[i].cfg; - priv->vp8_cfg.pkt_list = &priv->pkt_list.head; + cfg = &ctx->priv->alg_priv->cfg; - priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; + /* Select the extra vp6 configuration table based on the current + * usage value. If the current usage value isn't found, use the + * values for usage case 0. + */ + for (i = 0; + extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; + i++); - if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; + priv->vp8_cfg = extracfg_map[i].cfg; + priv->vp8_cfg.pkt_list = &priv->pkt_list.head; - priv->cx_data = malloc(priv->cx_data_sz); - priv->deprecated_mode = NO_MODE_SET; + priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; - vp8_initialize(); + if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; - res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); + priv->cx_data = malloc(priv->cx_data_sz); - if (!res) - { - set_vp8e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg); - optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf); + if (!priv->cx_data) + { + return VPX_CODEC_MEM_ERROR; + } - if (!optr) - res = VPX_CODEC_MEM_ERROR; - else - ctx->priv->alg_priv->cpi = optr; - } + priv->deprecated_mode = NO_MODE_SET; + + vp8_initialize(); + + res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); + + if (!res) + { + set_vp8e_config(&ctx->priv->alg_priv->oxcf, + ctx->priv->alg_priv->cfg, + ctx->priv->alg_priv->vp8_cfg); + optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf); + + if (!optr) + res = VPX_CODEC_MEM_ERROR; + else + ctx->priv->alg_priv->cpi = optr; } } @@ -690,7 +716,7 @@ if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) { flags |= VPX_EFLAG_FORCE_KF; - ctx->fixed_kf_cntr = 0; + ctx->fixed_kf_cntr = 1; } } @@ -860,8 +886,16 @@ { YV12_BUFFER_CONFIG sd; + vp8_ppflags_t flags = {0}; + + if (ctx->preview_ppcfg.post_proc_flag) + { + flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; + flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; + flags.noise_level = ctx->preview_ppcfg.noise_level; + } - if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag)) + if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags)) { /* @@ -888,8 +922,8 @@ ctx->preview_img.x_chroma_shift = 1; ctx->preview_img.y_chroma_shift = 1; - ctx->preview_img.d_w = ctx->cfg.g_w; - ctx->preview_img.d_h = ctx->cfg.g_h; + ctx->preview_img.d_w = sd.y_width; + ctx->preview_img.d_h = sd.y_height; ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride; ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride; ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride; @@ -1020,6 +1054,8 @@ {VP8E_SET_ARNR_MAXFRAMES, set_param}, {VP8E_SET_ARNR_STRENGTH , set_param}, {VP8E_SET_ARNR_TYPE , set_param}, + {VP8E_SET_TUNING, set_param}, + {VP8E_SET_CQ_LEVEL, set_param}, { -1, NULL}, }; @@ -1055,7 +1091,6 @@ 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ - 95, /* rc_undershoot_pct */ 200, /* rc_overshoot_pct */ diff -Nru libvpx-0.9.5/vp8/vp8cx.mk libvpx-0.9.6/vp8/vp8cx.mk --- libvpx-0.9.5/vp8/vp8cx.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8cx.mk 2011-03-04 20:40:40.000000000 +0000 @@ -33,8 +33,6 @@ #INCLUDES += common #INCLUDES += encoder -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)encoder - VP8_CX_SRCS-yes += encoder/bitstream.c VP8_CX_SRCS-yes += encoder/boolhuff.c VP8_CX_SRCS-yes += encoder/dct.c @@ -42,7 +40,7 @@ VP8_CX_SRCS-yes += encoder/encodeintra.c VP8_CX_SRCS-yes += encoder/encodemb.c VP8_CX_SRCS-yes += encoder/encodemv.c -VP8_CX_SRCS-yes += encoder/ethreading.c +VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c VP8_CX_SRCS-yes += encoder/firstpass.c VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c VP8_CX_SRCS-yes += encoder/block.h @@ -87,6 +85,7 @@ ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c +VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c endif VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h @@ -94,6 +93,7 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h +VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm @@ -107,9 +107,17 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm +VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm +VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm +ifeq ($(CONFIG_REALTIME_ONLY),yes) +VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm +endif + + VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) diff -Nru libvpx-0.9.5/vp8/vp8dx_arm.mk libvpx-0.9.6/vp8/vp8dx_arm.mk --- libvpx-0.9.5/vp8/vp8dx_arm.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8dx_arm.mk 2011-03-04 20:40:40.000000000 +0000 @@ -12,9 +12,9 @@ #VP8_DX_SRCS list is modified according to different platforms. VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c +VP8_CX_SRCS-$(ARCH_ARM) += decoder/asm_dec_offsets.c VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c -VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM) #File list for armv6 VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM) diff -Nru libvpx-0.9.5/vp8/vp8_dx_iface.c libvpx-0.9.6/vp8/vp8_dx_iface.c --- libvpx-0.9.5/vp8/vp8_dx_iface.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8_dx_iface.c 2011-03-04 20:40:40.000000000 +0000 @@ -15,24 +15,11 @@ #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" -#include "onyxd.h" -#include "onyxd_int.h" +#include "common/onyxd.h" +#include "decoder/onyxd_int.h" #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -#if CONFIG_BIG_ENDIAN -# define swap4(d)\ - ((d&0x000000ff)<<24) | \ - ((d&0x0000ff00)<<8) | \ - ((d&0x00ff0000)>>8) | \ - ((d&0xff000000)>>24) -# define swap2(d)\ - ((d&0x000000ff)<<8) | \ - ((d&0x0000ff00)>>8) -#else -# define swap4(d) d -# define swap2(d) d -#endif typedef vpx_codec_stream_info_t vp8_stream_info_t; /* Structures for handling memory allocations */ @@ -65,12 +52,19 @@ vpx_codec_priv_t base; vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs)-1]; vpx_codec_dec_cfg_t cfg; - vp8_stream_info_t si; + vp8_stream_info_t si; int defer_alloc; int decoder_init; VP8D_PTR pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; +#if CONFIG_POSTPROC_VISUALIZER + unsigned int dbg_postproc_flag; + int dbg_color_ref_frame_flag; + int dbg_color_mb_modes_flag; + int dbg_color_b_modes_flag; + int dbg_display_mv_flag; +#endif vpx_image_t img; int img_setup; int img_avail; @@ -253,8 +247,11 @@ unsigned int data_sz, vpx_codec_stream_info_t *si) { - vpx_codec_err_t res = VPX_CODEC_OK; + + if(data + data_sz <= data) + res = VPX_CODEC_INVALID_PARAM; + else { /* Parse uncompresssed part of key frame header. * 3 bytes:- including version, frame type and an offset @@ -273,8 +270,8 @@ if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a) res = VPX_CODEC_UNSUP_BITSTREAM; - si->w = swap2(*(const unsigned short *)(c + 3)) & 0x3fff; - si->h = swap2(*(const unsigned short *)(c + 5)) & 0x3fff; + si->w = (c[3] | (c[4] << 8)) & 0x3fff; + si->h = (c[5] | (c[6] << 8)) & 0x3fff; /*printf("w=%d, h=%d\n", si->w, si->h);*/ if (!(si->h | si->w)) @@ -331,7 +328,10 @@ ctx->img_avail = 0; - /* Determine the stream parameters */ + /* Determine the stream parameters. Note that we rely on peek_si to + * validate that we have a buffer that does not wrap around the top + * of the heap. + */ if (!ctx->si.h) res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); @@ -410,15 +410,27 @@ { YV12_BUFFER_CONFIG sd; INT64 time_stamp = 0, time_end_stamp = 0; - int ppflag = 0; - int ppdeblocking = 0; - int ppnoise = 0; + vp8_ppflags_t flags = {0}; if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { - ppflag = ctx->postproc_cfg.post_proc_flag; - ppdeblocking = ctx->postproc_cfg.deblocking_level; - ppnoise = ctx->postproc_cfg.noise_level; + flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag +#if CONFIG_POSTPROC_VISUALIZER + + | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0) + | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0) +#endif + ; + flags.deblocking_level = ctx->postproc_cfg.deblocking_level; + flags.noise_level = ctx->postproc_cfg.noise_level; +#if CONFIG_POSTPROC_VISUALIZER + flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag; + flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; + flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags.display_mv_flag = ctx->dbg_display_mv_flag; +#endif } if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) @@ -427,7 +439,7 @@ res = update_error_state(ctx, &pbi->common.error); } - if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag)) + if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) { /* Align width/height */ unsigned int a_w = (sd.y_width + 15) & ~15; @@ -441,6 +453,7 @@ vpx_img_set_rect(&ctx->img, VP8BORDERINPIXELS, VP8BORDERINPIXELS, sd.y_width, sd.y_height); + ctx->img.user_priv = user_priv; ctx->img_avail = 1; } @@ -640,12 +653,79 @@ #endif } +static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx, + int ctrl_id, + va_list args) +{ +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + int data = va_arg(args, int); + +#define MAP(id, var) case id: var = data; break; + + switch (ctrl_id) + { + MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag); + MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag); + MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag); + MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag); + } + + return VPX_CODEC_OK; +#else + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + int ctrl_id, + va_list args) +{ + int *update_info = va_arg(args, int *); + VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; + + if (update_info) + { + *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME + + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME + + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME; + + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; +} + + +static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + int ctrl_id, + va_list args) +{ + + int *corrupted = va_arg(args, int *); + + if (corrupted) + { + VP8D_COMP *pbi = (VP8D_COMP *)ctx->pbi; + *corrupted = pbi->common.frame_to_show->corrupted; + + return VPX_CODEC_OK; + } + else + return VPX_CODEC_INVALID_PARAM; + +} vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = { - {VP8_SET_REFERENCE, vp8_set_reference}, - {VP8_COPY_REFERENCE, vp8_get_reference}, - {VP8_SET_POSTPROC, vp8_set_postproc}, + {VP8_SET_REFERENCE, vp8_set_reference}, + {VP8_COPY_REFERENCE, vp8_get_reference}, + {VP8_SET_POSTPROC, vp8_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options}, + {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, { -1, NULL}, }; diff -Nru libvpx-0.9.5/vp8/vp8dx.mk libvpx-0.9.6/vp8/vp8dx.mk --- libvpx-0.9.5/vp8/vp8dx.mk 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vp8/vp8dx.mk 2011-03-04 20:40:40.000000000 +0000 @@ -24,9 +24,6 @@ VP8_DX_SRCS-yes += vp8_dx_iface.c -CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder - - # common #define ARM #define DISABLE_THREAD @@ -65,7 +62,7 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c -VP8_DX_SRCS-yes += decoder/threading.c +VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c VP8_DX_SRCS-yes += decoder/idct_blk.c VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c diff -Nru libvpx-0.9.5/vpx/internal/vpx_codec_internal.h libvpx-0.9.6/vpx/internal/vpx_codec_internal.h --- libvpx-0.9.5/vpx/internal/vpx_codec_internal.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/internal/vpx_codec_internal.h 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file decoder_impl.h +/*!\file * \brief Describes the decoder algorithm interface for algorithm * implementations. * @@ -214,7 +214,7 @@ vpx_codec_iter_t *iter); -/*\brief e_xternal Memory Allocation memory map get iterator +/*\brief eXternal Memory Allocation memory map get iterator * * Iterates over a list of the memory maps requested by the decoder. The * iterator storage should be initialized to NULL to start the iteration. @@ -230,7 +230,7 @@ vpx_codec_iter_t *iter); -/*\brief e_xternal Memory Allocation memory map set iterator +/*\brief eXternal Memory Allocation memory map set iterator * * Sets a memory descriptor inside the decoder instance. * @@ -321,7 +321,7 @@ { vpx_codec_put_frame_cb_fn_t put_frame; vpx_codec_put_slice_cb_fn_t put_slice; - }; + } u; void *user_priv; } vpx_codec_priv_cb_pair_t; @@ -405,7 +405,7 @@ /* Internal Utility Functions * - * The following functions are indended to be used inside algorithms as + * The following functions are intended to be used inside algorithms as * utilities for manipulating vpx_codec_* data structures. */ struct vpx_codec_pkt_list diff -Nru libvpx-0.9.5/vpx/src/vpx_codec.c libvpx-0.9.6/vpx/src/vpx_codec.c --- libvpx-0.9.5/vpx/src/vpx_codec.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/src/vpx_codec.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file vpx_decoder.c +/*!\file * \brief Provides the high level interface to wrap decoder algorithms. * */ diff -Nru libvpx-0.9.5/vpx/src/vpx_decoder.c libvpx-0.9.6/vpx/src/vpx_decoder.c --- libvpx-0.9.5/vpx/src/vpx_decoder.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/src/vpx_decoder.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file vpx_decoder.c +/*!\file * \brief Provides the high level interface to wrap decoder algorithms. * */ @@ -118,7 +118,9 @@ { vpx_codec_err_t res; - if (!ctx || !data || !data_sz) + /* Sanity checks */ + /* NULL data ptr allowed if data_sz is 0 too */ + if (!ctx || (!data && data_sz)) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; @@ -158,7 +160,7 @@ res = VPX_CODEC_ERROR; else { - ctx->priv->dec.put_frame_cb.put_frame = cb; + ctx->priv->dec.put_frame_cb.u.put_frame = cb; ctx->priv->dec.put_frame_cb.user_priv = user_priv; res = VPX_CODEC_OK; } @@ -180,7 +182,7 @@ res = VPX_CODEC_ERROR; else { - ctx->priv->dec.put_slice_cb.put_slice = cb; + ctx->priv->dec.put_slice_cb.u.put_slice = cb; ctx->priv->dec.put_slice_cb.user_priv = user_priv; res = VPX_CODEC_OK; } diff -Nru libvpx-0.9.5/vpx/src/vpx_decoder_compat.c libvpx-0.9.6/vpx/src/vpx_decoder_compat.c --- libvpx-0.9.5/vpx/src/vpx_decoder_compat.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/src/vpx_decoder_compat.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file vpx_decoder.c +/*!\file * \brief Provides the high level interface to wrap decoder algorithms. * */ diff -Nru libvpx-0.9.5/vpx/src/vpx_encoder.c libvpx-0.9.6/vpx/src/vpx_encoder.c --- libvpx-0.9.5/vpx/src/vpx_encoder.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/src/vpx_encoder.c 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file vpx_encoder.c +/*!\file * \brief Provides the high level interface to wrap encoder algorithms. * */ diff -Nru libvpx-0.9.5/vpx/vp8cx.h libvpx-0.9.6/vpx/vp8cx.h --- libvpx-0.9.5/vpx/vp8cx.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vp8cx.h 2011-03-04 20:40:40.000000000 +0000 @@ -16,7 +16,7 @@ */ #include "vp8.h" -/*!\file vp8cx.h +/*!\file * \brief Provides definitions for using the VP8 encoder algorithm within the * vpx Codec Interface. */ @@ -24,13 +24,15 @@ #define VP8CX_H #include "vpx/vpx_codec_impl_top.h" -/*!\brief Algorithm interface for VP8 +/*!\name Algorithm interface for VP8 * * This interface provides the capability to encode raw VP8 streams, as would * be found in AVI files. + * @{ */ extern vpx_codec_iface_t vpx_codec_vp8_cx_algo; extern vpx_codec_iface_t* vpx_codec_vp8_cx(void); +/*!@} - end algorithm interface member group*/ /* @@ -114,7 +116,10 @@ /*!\brief VP8 encoder control functions * - * The set of macros define the control functions of VP8 encoder interface + * This set of macros define the control functions available for the VP8 + * encoder interface. + * + * \sa #vpx_codec_control */ enum vp8e_enc_control_id { @@ -124,7 +129,18 @@ VP8E_SET_ROI_MAP, /**< control function to pass an ROI map to encoder */ VP8E_SET_ACTIVEMAP, /**< control function to pass an Active map to encoder */ VP8E_SET_SCALEMODE = 11, /**< control function to set encoder scaling mode */ - VP8E_SET_CPUUSED = 13, /**< control function to set vp8 encoder cpuused */ + /*!\brief control function to set vp8 encoder cpuused + * + * Changes in this value influences, among others, the encoder's selection + * of motion estimation methods. Values greater than 0 will increase encoder + * speed at the expense of quality. + * The full set of adjustments can be found in + * onyx_if.c:vp8_set_speed_features(). + * \todo List highlights of the changes at various levels. + * + * \note Valid range: -16..16 or {-16..-4, 4..16} w/CONFIG_REALTIME_ONLY + */ + VP8E_SET_CPUUSED = 13, VP8E_SET_ENABLEAUTOALTREF, /**< control function to enable vp8 to automatic set and use altref frame */ VP8E_SET_NOISE_SENSITIVITY, /**< control function to set noise sensitivity */ VP8E_SET_SHARPNESS, /**< control function to set sharpness */ @@ -140,7 +156,15 @@ VP8E_SET_ARNR_MAXFRAMES, /**< control function to set the max number of frames blurred creating arf*/ VP8E_SET_ARNR_STRENGTH , /**< control function to set the filter strength for the arf */ VP8E_SET_ARNR_TYPE , /**< control function to set the type of filter to use for the arf*/ -} ; + VP8E_SET_TUNING, /**< control function to set visual tuning */ + /*!\brief control function to set constrained quality level + * + * \attention For this value to be used vpx_codec_enc_cfg_t::g_usage must be + * set to #VPX_CQ. + * \note Valid range: 0..63 + */ + VP8E_SET_CQ_LEVEL, +}; /*!\brief vpx 1-D scaling mode * @@ -224,6 +248,18 @@ } vp8e_token_partitions; +/*!\brief VP8 model tuning parameters + * + * Changes the encoder to tune for certain types of input material. + * + */ +typedef enum +{ + VP8_TUNE_PSNR, + VP8_TUNE_SSIM +} vp8e_tuning; + + /*!\brief VP8 encoder control function parameter type * * Defines the data types that VP8E control functions take. Note that @@ -253,7 +289,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH , unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_TYPE , unsigned int) - +VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, vp8e_tuning) +VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL , unsigned int) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) diff -Nru libvpx-0.9.5/vpx/vp8dx.h libvpx-0.9.6/vpx/vp8dx.h --- libvpx-0.9.5/vpx/vp8dx.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vp8dx.h 2011-03-04 20:40:40.000000000 +0000 @@ -16,7 +16,7 @@ * * @{ */ -/*!\file vp8dx.h +/*!\file * \brief Provides definitions for using the VP8 algorithm within the vpx Decoder * interface. */ @@ -24,18 +24,53 @@ #define VP8DX_H #include "vpx/vpx_codec_impl_top.h" -/*!\brief Algorithm interface for VP8 +/*!\name Algorithm interface for VP8 * * This interface provides the capability to decode raw VP8 streams, as would * be found in AVI files and other non-Flash uses. + * @{ */ extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; extern vpx_codec_iface_t* vpx_codec_vp8_dx(void); +/*!@} - end algorithm interface member group*/ /* Include controls common to both the encoder and decoder */ #include "vp8.h" +/*!\brief VP8 decoder control functions + * + * This set of macros define the control functions available for the VP8 + * decoder interface. + * + * \sa #vpx_codec_control + */ +enum vp8_dec_control_id +{ + /** control function to get info on which reference frames were updated + * by the last decode + */ + VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START, + + /** check if the indicated frame is corrupted */ + VP8D_GET_FRAME_CORRUPTED, + + VP8_DECODER_CTRL_ID_MAX +} ; + + +/*!\brief VP8 decoder control function parameter type + * + * Defines the data types that VP8D control functions take. Note that + * additional common controls are defined in vp8.h + * + */ + + +VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) +VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) + + /*! @} - end defgroup vp8_decoder */ diff -Nru libvpx-0.9.5/vpx/vp8.h libvpx-0.9.6/vpx/vp8.h --- libvpx-0.9.5/vpx/vp8.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vp8.h 2011-03-04 20:40:40.000000000 +0000 @@ -14,7 +14,7 @@ * VP8 is vpx's newest video compression algorithm that uses motion * compensated prediction, Discrete Cosine Transform (DCT) coding of the * prediction error signal and context dependent entropy coding techniques - * based on arithmatic principles. It features: + * based on arithmetic principles. It features: * - YUV 4:2:0 image format * - Macro-block based coding (16x16 luma plus two 8x8 chroma) * - 1/4 (1/8) pixel accuracy motion compensated prediction @@ -25,7 +25,7 @@ * * @{ */ -/*!\file vp8.h +/*!\file * \brief Provides controls common to both the VP8 encoder and decoder. */ #ifndef VP8_H @@ -36,12 +36,17 @@ * * The set of macros define the control functions of VP8 interface */ -enum vp8_dec_control_id +enum vp8_com_control_id { - VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ - VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ - VP8_SET_POSTPROC = 3, /**< set decoder's the post processing settings */ - VP8_COMMON_CTRL_ID_MAX + VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ + VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ + VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ + VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */ + VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ + VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ + VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ + VP8_COMMON_CTRL_ID_MAX, + VP8_DECODER_CTRL_ID_START = 256, }; /*!\brief post process flags @@ -50,24 +55,28 @@ */ enum vp8_postproc_level { - VP8_NOFILTERING = 0, - VP8_DEBLOCK = 1, - VP8_DEMACROBLOCK = 2, - VP8_ADDNOISE = 4 + VP8_NOFILTERING = 0, + VP8_DEBLOCK = 1<<0, + VP8_DEMACROBLOCK = 1<<1, + VP8_ADDNOISE = 1<<2, + VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */ + VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */ + VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */ + VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */ }; /*!\brief post process flags * * This define a structure that describe the post processing settings. For - * the best objective measure (using thet PSNR metric) set post_proc_flag + * the best objective measure (using the PSNR metric) set post_proc_flag * to VP8_DEBLOCK and deblocking_level to 1. */ typedef struct vp8_postproc_cfg { - int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ - int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ - int noise_level; /**< the strength of additive noise, valid range [0, 16] */ + int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ + int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ + int noise_level; /**< the strength of additive noise, valid range [0, 16] */ } vp8_postproc_cfg_t; /*!\brief reference frame type @@ -93,14 +102,18 @@ } vpx_ref_frame_t; -/*!\brief vp8 decoder control funciton parameter type +/*!\brief vp8 decoder control function parameter type * - * defines the data type for each of VP8 decoder control funciton requires + * defines the data type for each of VP8 decoder control function requires */ VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) /*! @} - end defgroup vp8 */ diff -Nru libvpx-0.9.5/vpx/vpx_codec.h libvpx-0.9.6/vpx/vpx_codec.h --- libvpx-0.9.5/vpx/vpx_codec.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vpx_codec.h 2011-03-04 20:40:40.000000000 +0000 @@ -16,7 +16,7 @@ * @{ */ -/*!\file vpx_codec.h +/*!\file * \brief Describes the codec algorithm interface to applications. * * This file describes the interface between an application and a @@ -145,7 +145,7 @@ typedef long vpx_codec_caps_t; #define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */ #define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */ -#define VPX_CODEC_CAP_XMA 0x4 /**< Supports e_xternal Memory Allocation */ +#define VPX_CODEC_CAP_XMA 0x4 /**< Supports eXternal Memory Allocation */ /*! \brief Initialization-time Feature Enabling @@ -156,7 +156,7 @@ * The available flags are specified by VPX_CODEC_USE_* defines. */ typedef long vpx_codec_flags_t; -#define VPX_CODEC_USE_XMA 0x00000001 /**< Use e_xternal Memory Allocation mode */ +#define VPX_CODEC_USE_XMA 0x00000001 /**< Use eXternal Memory Allocation mode */ /*!\brief Codec interface structure. @@ -232,7 +232,7 @@ /*!\brief Return the version major number */ #define vpx_codec_version_major() ((vpx_codec_version()>>16)&0xff) - /*!\brief Return the version minr number */ + /*!\brief Return the version minor number */ #define vpx_codec_version_minor() ((vpx_codec_version()>>8)&0xff) /*!\brief Return the version patch number */ @@ -338,9 +338,9 @@ /*!\brief Get the capabilities of an algorithm. * - * Retrieves the capabliities bitfield from the algorithm's interface. + * Retrieves the capabilities bitfield from the algorithm's interface. * - * \param[in] iface Pointer to the alogrithm interface + * \param[in] iface Pointer to the algorithm interface * */ vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface); @@ -354,7 +354,7 @@ * * This wrapper function dispatches the request to the helper function * associated with the given ctrl_id. It tries to call this function - * transparantly, but will return #VPX_CODEC_ERROR if the request could not + * transparently, but will return #VPX_CODEC_ERROR if the request could not * be dispatched. * * Note that this function should not be used directly. Call the @@ -525,7 +525,7 @@ * passed in the order they are read from vpx_codec_get_mem_map(), but may be * passed in groups of any size. Segments \ref MUST be set only once. The * allocation function \ref MUST ensure that the vpx_codec_mmap_t::base member - * is non-NULL. If the segment requires cleanup handling (eg, calling free() + * is non-NULL. If the segment requires cleanup handling (e.g., calling free() * or close()) then the vpx_codec_mmap_t::dtor member \ref MUST be populated. * * \param[in] ctx Pointer to this instance's context. diff -Nru libvpx-0.9.5/vpx/vpx_decoder_compat.h libvpx-0.9.6/vpx/vpx_decoder_compat.h --- libvpx-0.9.5/vpx/vpx_decoder_compat.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vpx_decoder_compat.h 2011-03-04 20:40:40.000000000 +0000 @@ -16,7 +16,7 @@ * @{ */ -/*!\file vpx_decoder_compat.h +/*!\file * \brief Provides a compatibility layer between version 1 and 2 of this API. * * This interface has been deprecated. Only existing code should make use @@ -89,12 +89,12 @@ * ::vpx_dec_iface_t interface structure. Capabilities are extra interfaces * or functionality, and are not required to be supported by a decoder. * - * The available flags are specifiedby VPX_DEC_CAP_* defines. + * The available flags are specified by VPX_DEC_CAP_* defines. */ typedef int vpx_dec_caps_t; #define VPX_DEC_CAP_PUT_SLICE 0x0001 /**< Will issue put_slice callbacks */ #define VPX_DEC_CAP_PUT_FRAME 0x0002 /**< Will issue put_frame callbacks */ -#define VPX_DEC_CAP_XMA 0x0004 /**< Supports e_xternal Memory Allocation */ +#define VPX_DEC_CAP_XMA 0x0004 /**< Supports eXternal Memory Allocation */ /*!\brief Stream properties * @@ -222,7 +222,7 @@ * is properly initialized. * * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the alogrithm interface to use. + * \param[in] iface Pointer to the algorithm interface to use. * \param[in] ver ABI version number. Must be set to * VPX_DECODER_ABI_VERSION * \retval #VPX_DEC_OK @@ -253,9 +253,9 @@ /*!\brief Get the capabilities of an algorithm. * - * Retrieves the capabliities bitfield from the algorithm's interface. + * Retrieves the capabilities bitfield from the algorithm's interface. * - * \param[in] iface Pointer to the alogrithm interface + * \param[in] iface Pointer to the algorithm interface * */ vpx_dec_caps_t vpx_dec_get_caps(vpx_dec_iface_t *iface) DEPRECATED; @@ -267,7 +267,7 @@ * context is not necessary. Can be used to determine if the bitstream is * of the proper format, and to extract information from the stream. * - * \param[in] iface Pointer to the alogrithm interface + * \param[in] iface Pointer to the algorithm interface * \param[in] data Pointer to a block of data to parse * \param[in] data_sz Size of the data buffer * \param[in,out] si Pointer to stream info to update. The size member @@ -309,7 +309,7 @@ * * This wrapper function dispatches the request to the helper function * associated with the given ctrl_id. It tries to call this function - * transparantly, but will return #VPX_DEC_ERROR if the request could not + * transparently, but will return #VPX_DEC_ERROR if the request could not * be dispatched. * * \param[in] ctx Pointer to this instance's context @@ -507,7 +507,7 @@ * is properly initialized. * * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the alogrithm interface to use. + * \param[in] iface Pointer to the algorithm interface to use. * \param[in] ver ABI version number. Must be set to * VPX_DECODER_ABI_VERSION * \retval #VPX_DEC_OK @@ -558,7 +558,7 @@ * passed in the order they are read from vpx_dec_get_mem_map(), but may be * passed in groups of any size. Segments \ref MUST be set only once. The * allocation function \ref MUST ensure that the vpx_dec_mmap_t::base member - * is non-NULL. If the segment requires cleanup handling (eg, calling free() + * is non-NULL. If the segment requires cleanup handling (e.g., calling free() * or close()) then the vpx_dec_mmap_t::dtor member \ref MUST be populated. * * \param[in] ctx Pointer to this instance's context. diff -Nru libvpx-0.9.5/vpx/vpx_decoder.h libvpx-0.9.6/vpx/vpx_decoder.h --- libvpx-0.9.5/vpx/vpx_decoder.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vpx_decoder.h 2011-03-04 20:40:40.000000000 +0000 @@ -17,7 +17,7 @@ * @{ */ -/*!\file vpx_decoder.h +/*!\file * \brief Describes the decoder algorithm interface to applications. * * This file describes the interface between an application and a @@ -48,7 +48,7 @@ * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces * or functionality, and are not required to be supported by a decoder. * - * The available flags are specifiedby VPX_CODEC_CAP_* defines. + * The available flags are specified by VPX_CODEC_CAP_* defines. */ #define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */ #define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */ @@ -109,7 +109,7 @@ * kept readable and stable until all memory maps have been set. * * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the alogrithm interface to use. + * \param[in] iface Pointer to the algorithm interface to use. * \param[in] cfg Configuration to use, if known. May be NULL. * \param[in] flags Bitfield of VPX_CODEC_USE_* flags * \param[in] ver ABI version number. Must be set to @@ -139,7 +139,7 @@ * context is not necessary. Can be used to determine if the bitstream is * of the proper format, and to extract information from the stream. * - * \param[in] iface Pointer to the alogrithm interface + * \param[in] iface Pointer to the algorithm interface * \param[in] data Pointer to a block of data to parse * \param[in] data_sz Size of the data buffer * \param[in,out] si Pointer to stream info to update. The size member diff -Nru libvpx-0.9.5/vpx/vpx_encoder.h libvpx-0.9.6/vpx/vpx_encoder.h --- libvpx-0.9.5/vpx/vpx_encoder.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vpx_encoder.h 2011-03-04 20:40:40.000000000 +0000 @@ -17,7 +17,7 @@ * @{ */ -/*!\file vpx_encoder.h +/*!\file * \brief Describes the encoder algorithm interface to applications. * * This file describes the interface between an application and a @@ -51,7 +51,7 @@ * interfaces or functionality, and are not required to be supported * by an encoder. * - * The available flags are specifiedby VPX_CODEC_CAP_* defines. + * The available flags are specified by VPX_CODEC_CAP_* defines. */ #define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ @@ -147,7 +147,7 @@ /* This packet size is fixed to allow codecs to extend this * interface without having to manage storage for raw packets, - * ie if it's smaller than 128 bytes, you can store in the + * i.e., if it's smaller than 128 bytes, you can store in the * packet list directly. */ char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */ @@ -179,7 +179,8 @@ enum vpx_rc_mode { VPX_VBR, /**< Variable Bit Rate (VBR) mode */ - VPX_CBR /**< Constant Bit Rate (CBR) mode */ + VPX_CBR, /**< Constant Bit Rate (CBR) mode */ + VPX_CQ /**< Constant Quality (CQ) mode */ }; diff -Nru libvpx-0.9.5/vpx/vpx_image.h libvpx-0.9.6/vpx/vpx_image.h --- libvpx-0.9.5/vpx/vpx_image.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx/vpx_image.h 2011-03-04 20:40:40.000000000 +0000 @@ -9,7 +9,7 @@ */ -/*!\file vpx_image.h +/*!\file * \brief Describes the vpx image descriptor and associated operations * */ @@ -33,7 +33,7 @@ #define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format */ #define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U plane in memory */ -#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel componnent */ +#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel component */ /*!\brief List of supported image formats */ @@ -115,7 +115,7 @@ #define VPX_PLANE_Y 0 /**< Y (Luminance) plane */ #define VPX_PLANE_U 1 /**< U (Chroma) plane */ #define VPX_PLANE_V 2 /**< V (Chroma) plane */ -#define VPX_PLANE_ALPHA 3 /**< A (Transparancy) plane */ +#define VPX_PLANE_ALPHA 3 /**< A (Transparency) plane */ #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT #define PLANE_PACKED VPX_PLANE_PACKED #define PLANE_Y VPX_PLANE_Y diff -Nru libvpx-0.9.5/vpxdec.c libvpx-0.9.6/vpxdec.c --- libvpx-0.9.5/vpxdec.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpxdec.c 2011-03-04 20:40:41.000000000 +0000 @@ -17,14 +17,7 @@ #include #include #include -#if defined(_WIN32) -#include -#define snprintf _snprintf -#define isatty _isatty -#define fileno _fileno -#else -#include -#endif + #define VPX_CODEC_DISABLE_COMPAT 1 #include "vpx_config.h" #include "vpx/vpx_decoder.h" @@ -35,8 +28,20 @@ #if CONFIG_MD5 #include "md5_utils.h" #endif +#include "tools_common.h" #include "nestegg/include/nestegg/nestegg.h" +#if CONFIG_OS_SUPPORT +#if defined(_WIN32) +#include +#define snprintf _snprintf +#define isatty _isatty +#define fileno _fileno +#else +#include +#endif +#endif + #ifndef PATH_MAX #define PATH_MAX 256 #endif @@ -107,11 +112,19 @@ "Enable VP8 demacroblocking, w/ level"); static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1, "Enable VP8 visible debug info"); - +static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1, + "Display only selected reference frame per macro block"); +static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1, + "Display only selected macro block modes"); +static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1, + "Display only selected block modes"); +static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1, + "Draw only selected motion vectors"); static const arg_def_t *vp8_pp_args[] = { &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info, + &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, NULL }; #endif @@ -314,7 +327,8 @@ } else { - FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout; + FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") + : set_binary_mode(stdout); if (!outfile) { @@ -432,6 +446,8 @@ int is_raw = 0; vpx_codec_stream_info_t si; + si.sz = sizeof(si); + if (fread(buf, 1, 32, infile) == 32) { int i; @@ -540,6 +556,7 @@ *fps_den = tstamp / 1000; return 0; fail: + nestegg_destroy(input->nestegg_ctx); input->nestegg_ctx = NULL; rewind(input->infile); return 1; @@ -702,6 +719,10 @@ vpx_codec_dec_cfg_t cfg = {0}; #if CONFIG_VP8_DECODER vp8_postproc_cfg_t vp8_pp_cfg = {0}; + int vp8_dbg_color_ref_frame = 0; + int vp8_dbg_color_mb_modes = 0; + int vp8_dbg_color_b_modes = 0; + int vp8_dbg_display_mv = 0; #endif struct input_ctx input = {0}; @@ -787,6 +808,42 @@ if (level) vp8_pp_cfg.post_proc_flag |= level; } + else if (arg_match(&arg, &pp_disp_ref_frame, argi)) + { + unsigned int flags = arg_parse_int(&arg); + if (flags) + { + postproc = 1; + vp8_dbg_color_ref_frame = flags; + } + } + else if (arg_match(&arg, &pp_disp_mb_modes, argi)) + { + unsigned int flags = arg_parse_int(&arg); + if (flags) + { + postproc = 1; + vp8_dbg_color_mb_modes = flags; + } + } + else if (arg_match(&arg, &pp_disp_b_modes, argi)) + { + unsigned int flags = arg_parse_int(&arg); + if (flags) + { + postproc = 1; + vp8_dbg_color_b_modes = flags; + } + } + else if (arg_match(&arg, &pp_disp_mvs, argi)) + { + unsigned int flags = arg_parse_int(&arg); + if (flags) + { + postproc = 1; + vp8_dbg_display_mv = flags; + } + } #endif else @@ -805,7 +862,7 @@ usage_exit(); /* Open file */ - infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin; + infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin); if (!infile) { @@ -813,7 +870,7 @@ strcmp(fn, "-") ? fn : "stdin"); return EXIT_FAILURE; } - +#if CONFIG_OS_SUPPORT /* Make sure we don't dump to the terminal, unless forced to with -o - */ if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) { @@ -822,7 +879,7 @@ "override.\n"); return EXIT_FAILURE; } - +#endif input.infile = infile; if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den, &fps_num)) @@ -876,7 +933,13 @@ } if(input.kind == WEBM_FILE) - webm_guess_framerate(&input, &fps_den, &fps_num); + if(webm_guess_framerate(&input, &fps_den, &fps_num)) + { + fprintf(stderr, "Failed to guess framerate -- error parsing " + "webm file?\n"); + return EXIT_FAILURE; + } + /*Note: We can't output an aspect ratio here because IVF doesn't store one, and neither does VP8. @@ -920,6 +983,33 @@ return EXIT_FAILURE; } + if (vp8_dbg_color_ref_frame + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) + { + fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_color_mb_modes + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) + { + fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_color_b_modes + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) + { + fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_display_mv + && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) + { + fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } #endif /* Decode file */ diff -Nru libvpx-0.9.5/vpxenc.c libvpx-0.9.6/vpxenc.c --- libvpx-0.9.5/vpxenc.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpxenc.c 2011-03-04 20:40:41.000000000 +0000 @@ -12,7 +12,7 @@ /* This is a simple program that encodes YV12 files and generates ivf * files using the new interface. */ -#if defined(_WIN32) +#if defined(_WIN32) || !CONFIG_OS_SUPPORT #define USE_POSIX_MMAP 0 #else #define USE_POSIX_MMAP 1 @@ -35,6 +35,7 @@ #include "vpx/vp8cx.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" +#include "tools_common.h" #include "y4minput.h" #include "libmkv/EbmlWriter.h" #include "libmkv/EbmlIDs.h" @@ -57,6 +58,14 @@ #define LITERALU64(n) n##LLU #endif +/* We should use 32-bit file operations in WebM file format + * when building ARM executable file (.axf) with RVCT */ +#if !CONFIG_OS_SUPPORT +typedef long off_t; +#define fseeko fseek +#define ftello ftell +#endif + static const char *exec_name; static const struct codec_item @@ -185,11 +194,11 @@ } -void stats_close(stats_io_t *stats) +void stats_close(stats_io_t *stats, int last_pass) { if (stats->file) { - if (stats->pass == 1) + if (stats->pass == last_pass) { #if 0 #elif USE_POSIX_MMAP @@ -204,7 +213,7 @@ } else { - if (stats->pass == 1) + if (stats->pass == last_pass) free(stats->buf.buf); } } @@ -250,7 +259,8 @@ struct detect_buffer { char buf[4]; - int valid; + size_t buf_read; + size_t position; }; @@ -304,14 +314,21 @@ for (r = 0; r < h; r++) { - if (detect->valid) + size_t needed = w; + size_t buf_position = 0; + const size_t left = detect->buf_read - detect->position; + if (left > 0) + { + const size_t more = (left < needed) ? left : needed; + memcpy(ptr, detect->buf + detect->position, more); + buf_position = more; + needed -= more; + detect->position += more; + } + if (needed > 0) { - memcpy(ptr, detect->buf, 4); - shortread |= fread(ptr+4, 1, w-4, f) < w-4; - detect->valid = 0; + shortread |= (fread(ptr + buf_position, 1, needed, f) < needed); } - else - shortread |= fread(ptr, 1, w, f) < w; ptr += img->stride[plane]; } @@ -338,12 +355,12 @@ unsigned int *fourcc, unsigned int *width, unsigned int *height, - char detect[4]) + struct detect_buffer *detect) { char raw_hdr[IVF_FILE_HDR_SZ]; int is_ivf = 0; - if(memcmp(detect, "DKIF", 4) != 0) + if(memcmp(detect->buf, "DKIF", 4) != 0) return 0; /* See write_ivf_file_header() for more documentation on the file header @@ -367,6 +384,7 @@ { *width = mem_get_le16(raw_hdr + 12); *height = mem_get_le16(raw_hdr + 14); + detect->position = 4; } return is_ivf; @@ -434,7 +452,7 @@ int debug; FILE *stream; - uint64_t last_pts_ms; + int64_t last_pts_ms; vpx_rational_t framerate; /* These pointers are to the start of an element */ @@ -647,7 +665,7 @@ unsigned char track_number; unsigned short block_timecode = 0; unsigned char flags; - uint64_t pts_ms; + int64_t pts_ms; int start_cluster = 0, is_keyframe; /* Calculate the PTS of this frame in milliseconds */ @@ -907,7 +925,7 @@ static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1, "Downscale threshold (buf %)"); static const arg_def_t end_usage = ARG_DEF(NULL, "end-usage", 1, - "VBR=0 | CBR=1"); + "VBR=0 | CBR=1 | CQ=2"); static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1, "Bitrate (kbps)"); static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1, @@ -978,23 +996,34 @@ static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames"); static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, - "alt_ref Max Frames"); + "AltRef Max Frames"); static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1, - "alt_ref Strength"); + "AltRef Strength"); static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1, - "alt_ref Type"); + "AltRef Type"); +static const struct arg_enum_list tuning_enum[] = { + {"psnr", VP8_TUNE_PSNR}, + {"ssim", VP8_TUNE_SSIM}, + {NULL, 0} +}; +static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1, + "Material to favor", tuning_enum); +static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1, + "Constrained Quality Level"); static const arg_def_t *vp8_args[] = { &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, - &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, NULL + &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, + &tune_ssim, &cq_level, NULL }; static const int vp8_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, VP8E_SET_TOKEN_PARTITIONS, - VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, 0 + VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, + VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, 0 }; #endif @@ -1073,6 +1102,7 @@ int psnr_count = 0; exec_name = argv_[0]; + ebml.last_pts_ms = -1; if (argc < 3) usage_exit(); @@ -1189,6 +1219,12 @@ */ cfg.g_timebase.den = 1000; + /* Never use the library's default resolution, require it be parsed + * from the file or set on the command line. + */ + cfg.g_w = 0; + cfg.g_h = 0; + /* Now parse the remainder of the parameters. */ for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { @@ -1300,7 +1336,7 @@ if (arg_ctrl_cnt < ARG_CTRL_CNT_MAX) { arg_ctrls[arg_ctrl_cnt][0] = ctrl_args_map[i]; - arg_ctrls[arg_ctrl_cnt][1] = arg_parse_int(&arg); + arg_ctrls[arg_ctrl_cnt][1] = arg_parse_enum_or_int(&arg); arg_ctrl_cnt++; } } @@ -1330,11 +1366,11 @@ { int frames_in = 0, frames_out = 0; unsigned long nbytes = 0; - size_t detect_bytes; struct detect_buffer detect; /* Parse certain options from the input file, if possible */ - infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin; + infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") + : set_binary_mode(stdin); if (!infile) { @@ -1344,13 +1380,11 @@ /* For RAW input sources, these bytes will applied on the first frame * in read_frame(). - * We can always read 4 bytes because the minimum supported frame size - * is 2x2. */ - detect_bytes = fread(detect.buf, 1, 4, infile); - detect.valid = 0; + detect.buf_read = fread(detect.buf, 1, 4, infile); + detect.position = 0; - if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf)) + if (detect.buf_read == 4 && file_is_y4m(infile, &y4m, detect.buf)) { if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0) { @@ -1375,8 +1409,8 @@ return EXIT_FAILURE; } } - else if (detect_bytes == 4 && - file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf)) + else if (detect.buf_read == 4 && + file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, &detect)) { file_type = FILE_TYPE_IVF; switch (fourcc) @@ -1395,8 +1429,15 @@ else { file_type = FILE_TYPE_RAW; - detect.valid = 1; } + + if(!cfg.g_w || !cfg.g_h) + { + fprintf(stderr, "Specify stream dimensions with --width (-w) " + " and --height (-h).\n"); + return EXIT_FAILURE; + } + #define SHOW(field) fprintf(stderr, " %-28s = %d\n", #field, cfg.field) if (verbose && pass == 0) @@ -1449,7 +1490,8 @@ cfg.g_w, cfg.g_h, 1); } - outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout; + outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") + : set_binary_mode(stdout); if (!outfile) { @@ -1527,7 +1569,7 @@ vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt; struct vpx_usec_timer timer; - int64_t frame_start; + int64_t frame_start, next_frame_start; if (!arg_limit || frames_in < arg_limit) { @@ -1548,9 +1590,11 @@ frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1) * arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num; + next_frame_start = (cfg.g_timebase.den * (int64_t)(frames_in) + * arg_framerate.den) + / cfg.g_timebase.num / arg_framerate.num; vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start, - cfg.g_timebase.den * arg_framerate.den - / cfg.g_timebase.num / arg_framerate.num, + next_frame_start - frame_start, 0, arg_deadline); vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); @@ -1658,7 +1702,7 @@ } fclose(outfile); - stats_close(&stats); + stats_close(&stats, arg_passes-1); fprintf(stderr, "\n"); if (one_pass_only) diff -Nru libvpx-0.9.5/vpx_mem/intel_linux/vpx_mem.c libvpx-0.9.6/vpx_mem/intel_linux/vpx_mem.c --- libvpx-0.9.5/vpx_mem/intel_linux/vpx_mem.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_mem/intel_linux/vpx_mem.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,951 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#define __VPX_MEM_C__ - -#include "vpx_mem.h" -#include -#include -#include - -#ifndef CONFIG_MEM_MANAGER -# if defined(VXWORKS) -# define CONFIG_MEM_MANAGER 1 //include heap manager functionality, -//default: enabled on vxworks -# else -# define CONFIG_MEM_MANAGER 0 //include heap manager functionality -# endif -#endif - -#ifndef CONFIG_MEM_TRACKER -# define CONFIG_MEM_TRACKER 1 //include xvpx_* calls in the lib -#endif - -#ifndef CONFIG_MEM_CHECKS -# define CONFIG_MEM_CHECKS 0 //include some basic safety checks in -//vpx_memcpy, _memset, and _memmove -#endif - -#ifndef USE_GLOBAL_FUNCTION_POINTERS -# define USE_GLOBAL_FUNCTION_POINTERS 0 //use function pointers instead of compiled functions. -#endif - -#if CONFIG_MEM_TRACKER -# include "vpx_mem_tracker.h" -# if VPX_MEM_TRACKER_VERSION_CHIEF != 2 || VPX_MEM_TRACKER_VERSION_MAJOR != 5 -# error "vpx_mem requires memory tracker version 2.5 to track memory usage" -# endif -#endif - -#define ADDRESS_STORAGE_SIZE sizeof(size_t) - -#ifndef DEFAULT_ALIGNMENT -# if defined(VXWORKS) -# define DEFAULT_ALIGNMENT 32 //default addr alignment to use in -//calls to vpx_* functions other -//than vpx_memalign -# else -# define DEFAULT_ALIGNMENT 1 -# endif -#endif - -#if DEFAULT_ALIGNMENT < 1 -# error "DEFAULT_ALIGNMENT must be >= 1!" -#endif - -#if CONFIG_MEM_TRACKER -# define TRY_BOUNDS_CHECK 1 //when set to 1 pads each allocation, -//integrity can be checked using -//vpx_memory_tracker_check_integrity -//or on free by defining -//TRY_BOUNDS_CHECK_ON_FREE -static unsigned long g_alloc_count = 0; - -#else -# define TRY_BOUNDS_CHECK 0 -#endif - -#if TRY_BOUNDS_CHECK -# define TRY_BOUNDS_CHECK_ON_FREE 0 //checks mem integrity on every -//free, very expensive -# define BOUNDS_CHECK_VALUE 0xdeadbeef //value stored before/after ea. -//mem addr for bounds checking -# define BOUNDS_CHECK_PAD_SIZE 32 //size of the padding before and -//after ea allocation to be filled -//with BOUNDS_CHECK_VALUE. -//this should be a multiple of 4 -#else -# define BOUNDS_CHECK_VALUE 0 -# define BOUNDS_CHECK_PAD_SIZE 0 -#endif - -#if CONFIG_MEM_MANAGER -# include "heapmm.h" -# include "hmm_intrnl.h" - -# define SHIFT_HMM_ADDR_ALIGN_UNIT 5 -# define TOTAL_MEMORY_TO_ALLOCATE 20971520 // 20 * 1024 * 1024 - -# define MM_DYNAMIC_MEMORY 1 -# if MM_DYNAMIC_MEMORY -static unsigned char *g_p_mng_memory_raw = NULL; -static unsigned char *g_p_mng_memory = NULL; -# else -static unsigned char g_p_mng_memory[TOTAL_MEMORY_TO_ALLOCATE]; -# endif - -static size_t g_mm_memory_size = TOTAL_MEMORY_TO_ALLOCATE; - -static hmm_descriptor hmm_d; -static int g_mng_memory_allocated = 0; - -static int vpx_mm_create_heap_memory(); -static void *vpx_mm_realloc(void *memblk, size_t size); -#endif //CONFIG_MEM_MANAGER - -#if USE_GLOBAL_FUNCTION_POINTERS - -struct GLOBAL_FUNC_POINTERS -{ - g_malloc_func g_malloc; - g_calloc_func g_calloc; - g_realloc_func g_realloc; - g_free_func g_free; - g_memcpy_func g_memcpy; - g_memset_func g_memset; - g_memmove_func g_memmove; -}; -struct GLOBAL_FUNC_POINTERS *g_func = 0; - -# define VPX_MALLOC_L g_func->g_malloc -# define VPX_REALLOC_L g_func->g_realloc -# define VPX_FREE_L g_func->g_free -# define VPX_MEMCPY_L g_func->g_memcpy -# define VPX_MEMSET_L g_func->g_memset -# define VPX_MEMMOVE_L g_func->g_memmove - -#else -# define VPX_MALLOC_L malloc -# define VPX_REALLOC_L realloc -# define VPX_FREE_L free -# define VPX_MEMCPY_L memcpy -# define VPX_MEMSET_L memset -# define VPX_MEMMOVE_L memmove -#endif // USE_GLOBAL_FUNCTION_POINTERS - -/* Should probably use a vpx_mem logger function. */ -#define __REMOVE_PRINTFS -#ifdef __REMOVE_PRINTFS -#define _P(x) -#else -#define _P(x) x -#endif - -/*returns an addr aligned to the byte boundary specified by align*/ -#define align_addr(addr,align) \ - (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -unsigned int vpx_mem_get_version() -{ - unsigned int ver = ((unsigned int)(unsigned char)VPX_MEM_VERSION_CHIEF << 24 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_MAJOR << 16 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_MINOR << 8 | - (unsigned int)(unsigned char)VPX_MEM_VERSION_PATCH); - return ver; -} - -int vpx_mem_set_heap_size(size_t size) -{ - int ret = -1; - -#if CONFIG_MEM_MANAGER -#if MM_DYNAMIC_MEMORY - - if (!g_mng_memory_allocated && size) - { - g_mm_memory_size = size; - ret = 0; - } - else - ret = -3; - -#else - ret = -2; -#endif -#else - (void)size; -#endif - - return ret; -} - -void *vpx_memalign(size_t align, size_t size) -{ - void *addr, - * x = NULL; - -#if CONFIG_MEM_MANAGER - int number_aau; - - if (vpx_mm_create_heap_memory() < 0) - { - _P(printf("[vpx][mm] ERROR vpx_memalign() Couldn't create memory for Heap.\n");) - } - - number_aau = ((size + align - 1 + ADDRESS_STORAGE_SIZE) >> - SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - - addr = hmm_alloc(&hmm_d, number_aau); -#else - addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE); -#endif //CONFIG_MEM_MANAGER - - if (addr) - { - x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)addr; - } - - return x; -} - -void *vpx_malloc(size_t size) -{ - return vpx_memalign(DEFAULT_ALIGNMENT, size); -} - -void *vpx_calloc(size_t num, size_t size) -{ - void *x; - - x = vpx_memalign(DEFAULT_ALIGNMENT, num * size); - - if (x) - VPX_MEMSET_L(x, 0, num * size); - - return x; -} - -void *vpx_realloc(void *memblk, size_t size) -{ - void *addr, - * new_addr = NULL; - int align = DEFAULT_ALIGNMENT; - - /* - The realloc() function changes the size of the object pointed to by - ptr to the size specified by size, and returns a pointer to the - possibly moved block. The contents are unchanged up to the lesser - of the new and old sizes. If ptr is null, realloc() behaves like - malloc() for the specified size. If size is zero (0) and ptr is - not a null pointer, the object pointed to is freed. - */ - if (!memblk) - new_addr = vpx_malloc(size); - else if (!size) - vpx_free(memblk); - else - { - addr = (void *)(((size_t *)memblk)[-1]); - memblk = NULL; - -#if CONFIG_MEM_MANAGER - new_addr = vpx_mm_realloc(addr, size + align + ADDRESS_STORAGE_SIZE); -#else - new_addr = VPX_REALLOC_L(addr, size + align + ADDRESS_STORAGE_SIZE); -#endif - - if (new_addr) - { - addr = new_addr; - new_addr = (void *)(((size_t) - ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) & - (size_t) - align); - /* save the actual malloc address */ - ((size_t *)new_addr)[-1] = (size_t)addr; - } - } - - return new_addr; -} - -void vpx_free(void *memblk) -{ - if (memblk) - { - void *addr = (void *)(((size_t *)memblk)[-1]); -#if CONFIG_MEM_MANAGER - hmm_free(&hmm_d, addr); -#else - VPX_FREE_L(addr); -#endif - } -} - -void *vpx_mem_alloc(int id, size_t size, size_t align) -{ -#if defined CHIP_DM642 || defined __uClinux__ - void *mem = (void *)mem_alloc(id, size, align); - - if (!mem) - { - _P(fprintf(stderr, - "\n" - "*********************************************************\n" - "WARNING: mem_alloc returned 0 for id=%p size=%u align=%u.\n" - "*********************************************************\n", - mem, size, align)); - // should no longer need this. Softier says it's fixed. 2005-01-21 tjf - //#if defined __uClinux__ - //while(1)usleep(1000000); - //#endif - } - -#if defined __uClinux__ - else if (mem == (void *)0xFFFFFFFF) - { - // out of memory/error - mem = (void *)0; - - _P(fprintf(stderr, - "\n" - "******************************************************\n" - "ERROR: mem_alloc id=%p size=%u align=%u OUT OF MEMORY.\n" - "******************************************************\n", - mem, size, align)); - } - -#endif // __uClinux__ - - return mem; -#else - (void)id; - (void)size; - (void)align; - return (void *)0; -#endif -} - -void vpx_mem_free(int id, void *mem, size_t size) -{ -#if defined CHIP_DM642 || defined __uClinux__ - - if (!mem) - { - _P(fprintf(stderr, - "\n" - "**************************************\n" - "WARNING: 0 being free'd id=%p size=%u.\n" - "**************************************\n", - id, size)); - - // should no longer need this. Softier says it's fixed. 2005-01-21 tjf - //#if defined __uClinux__ - //while(1)usleep(1000000); - //#endif - } - - mem_free(id, mem, size); -#else - (void)id; - (void)mem; - (void)size; -#endif -} - - -#if CONFIG_MEM_TRACKER - -void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line) -{ - void *mem = vpx_mem_alloc(id, size, align); - - vpx_memory_tracker_add((size_t)mem, size, file, line, 0); - - return mem; -} - -void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line) -{ - if (vpx_memory_tracker_remove((size_t)mem) == -2) - { -#if REMOVE_PRINTFS - (void)file; - (void)line; -#endif - _P(fprintf(stderr, "[vpx_mem][xvpx_mem_free] addr: %p (id=%p size=%u) " - "not found in list; freed from file:%s" - " line:%d\n", mem, id, size, file, line)); - } - - vpx_mem_free(id, mem, size); -} - -void *xvpx_memalign(size_t align, size_t size, char *file, int line) -{ -#if TRY_BOUNDS_CHECK - unsigned char *x_bounds; -#endif - - void *x; - - if (g_alloc_count == 0) - { -#if TRY_BOUNDS_CHECK - int i_rv = vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE); -#else - int i_rv = vpx_memory_tracker_init(0, 0); -#endif - - if (i_rv < 0) - { - _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");) - } - } - -#if TRY_BOUNDS_CHECK - { - int i; - unsigned int tempme = BOUNDS_CHECK_VALUE; - - x_bounds = vpx_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2)); - - if (x_bounds) - { - /*we're aligning the address twice here but to keep things - consistent we want to have the padding come before the stored - address so no matter what free function gets called we will - attempt to free the correct address*/ - x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]); - x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE, - (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)x_bounds; - - for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) - { - VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int)); - VPX_MEMCPY_L((unsigned char *)x + size + i, - &tempme, sizeof(unsigned int)); - } - } - else - x = NULL; - } -#else - x = vpx_memalign(align, size); -#endif //TRY_BOUNDS_CHECK - - g_alloc_count++; - - vpx_memory_tracker_add((size_t)x, size, file, line, 1); - - return x; -} - -void *xvpx_malloc(size_t size, char *file, int line) -{ - return xvpx_memalign(DEFAULT_ALIGNMENT, size, file, line); -} - -void *xvpx_calloc(size_t num, size_t size, char *file, int line) -{ - void *x = xvpx_memalign(DEFAULT_ALIGNMENT, num * size, file, line); - - if (x) - VPX_MEMSET_L(x, 0, num * size); - - return x; -} - -void *xvpx_realloc(void *memblk, size_t size, char *file, int line) -{ - struct mem_block *p = NULL; - int orig_size = 0, - orig_line = 0; - char *orig_file = NULL; - -#if TRY_BOUNDS_CHECK - unsigned char *x_bounds = memblk ? - (unsigned char *)(((size_t *)memblk)[-1]) : - NULL; -#endif - - void *x; - - if (g_alloc_count == 0) - { -#if TRY_BOUNDS_CHECK - - if (!vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE)) -#else - if (!vpx_memory_tracker_init(0, 0)) -#endif - { - _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");) - } - } - - if (p = vpx_memory_tracker_find((size_t)memblk)) - { - orig_size = p->size; - orig_file = p->file; - orig_line = p->line; - } - -#if TRY_BOUNDS_CHECK_ON_FREE - vpx_memory_tracker_check_integrity(file, line); -#endif - - //have to do this regardless of success, because - //the memory that does get realloc'd may change - //the bounds values of this block - vpx_memory_tracker_remove((size_t)memblk); - -#if TRY_BOUNDS_CHECK - { - int i; - unsigned int tempme = BOUNDS_CHECK_VALUE; - - x_bounds = vpx_realloc(memblk, size + (BOUNDS_CHECK_PAD_SIZE * 2)); - - if (x_bounds) - { - x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]); - x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE, - (int)DEFAULT_ALIGNMENT); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)x_bounds; - - for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) - { - VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int)); - VPX_MEMCPY_L((unsigned char *)x + size + i, - &tempme, sizeof(unsigned int)); - } - } - else - x = NULL; - } -#else - x = vpx_realloc(memblk, size); -#endif //TRY_BOUNDS_CHECK - - if (x) - vpx_memory_tracker_add((size_t)x, size, file, line, 1); - else - vpx_memory_tracker_add((size_t)memblk, orig_size, orig_file, orig_line, 1); - - return x; -} - -void xvpx_free(void *p_address, char *file, int line) -{ -#if TRY_BOUNDS_CHECK - unsigned char *p_bounds_address = (unsigned char *)p_address; - //p_bounds_address -= BOUNDS_CHECK_PAD_SIZE; -#endif - -#if !TRY_BOUNDS_CHECK_ON_FREE - (void)file; - (void)line; -#endif - - if (p_address) - { -#if TRY_BOUNDS_CHECK_ON_FREE - vpx_memory_tracker_check_integrity(file, line); -#endif - - //if the addr isn't found in the list, assume it was allocated via - //vpx_ calls not xvpx_, therefore it does not contain any padding - if (vpx_memory_tracker_remove((size_t)p_address) == -2) - { - p_bounds_address = p_address; - _P(fprintf(stderr, "[vpx_mem][xvpx_free] addr: %p not found in" - " list; freed from file:%s" - " line:%d\n", p_address, file, line)); - } - else - --g_alloc_count; - -#if TRY_BOUNDS_CHECK - vpx_free(p_bounds_address); -#else - vpx_free(p_address); -#endif - - if (!g_alloc_count) - vpx_memory_tracker_destroy(); - } -} - -#endif /*CONFIG_MEM_TRACKER*/ - -#if CONFIG_MEM_CHECKS -#if defined(VXWORKS) -#include //for task_delay() -/* This function is only used to get a stack trace of the player -object so we can se where we are having a problem. */ -static int get_my_tt(int task) -{ - tt(task); - - return 0; -} - -static void vx_sleep(int msec) -{ - int ticks_to_sleep = 0; - - if (msec) - { - int msec_per_tick = 1000 / sys_clk_rate_get(); - - if (msec < msec_per_tick) - ticks_to_sleep++; - else - ticks_to_sleep = msec / msec_per_tick; - } - - task_delay(ticks_to_sleep); -} -#endif -#endif - -void *vpx_memcpy(void *dest, const void *source, size_t length) -{ -#if CONFIG_MEM_CHECKS - - if (((int)dest < 0x4000) || ((int)source < 0x4000)) - { - _P(printf("WARNING: vpx_memcpy dest:0x%x source:0x%x len:%d\n", (int)dest, (int)source, length);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMCPY_L(dest, source, length); -} - -void *vpx_memset(void *dest, int val, size_t length) -{ -#if CONFIG_MEM_CHECKS - - if ((int)dest < 0x4000) - { - _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", (int)dest, val, length);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMSET_L(dest, val, length); -} - -void *vpx_memmove(void *dest, const void *src, size_t count) -{ -#if CONFIG_MEM_CHECKS - - if (((int)dest < 0x4000) || ((int)src < 0x4000)) - { - _P(printf("WARNING: vpx_memmove dest:0x%x src:0x%x count:%d\n", (int)dest, (int)src, count);) - -#if defined(VXWORKS) - sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0); - - vx_sleep(10000); -#endif - } - -#endif - - return VPX_MEMMOVE_L(dest, src, count); -} - -#if CONFIG_MEM_MANAGER - -static int vpx_mm_create_heap_memory() -{ - int i_rv = 0; - - if (!g_mng_memory_allocated) - { -#if MM_DYNAMIC_MEMORY - g_p_mng_memory_raw = - (unsigned char *)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT); - - if (g_p_mng_memory_raw) - { - g_p_mng_memory = (unsigned char *)((((unsigned int)g_p_mng_memory_raw) + - HMM_ADDR_ALIGN_UNIT - 1) & - -(int)HMM_ADDR_ALIGN_UNIT); - - _P(printf("[vpx][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n" - , g_mm_memory_size + HMM_ADDR_ALIGN_UNIT - , (unsigned int)g_p_mng_memory_raw - , (unsigned int)g_p_mng_memory);) - } - else - { - _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n" - , g_mm_memory_size);) - - i_rv = -1; - } - - if (g_p_mng_memory) -#endif - { - int chunk_size = 0; - - g_mng_memory_allocated = 1; - - hmm_init(&hmm_d); - - chunk_size = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT; - - chunk_size -= DUMMY_END_BLOCK_BAUS; - - _P(printf("[vpx][mm] memory size:%d for vpx memory manager. g_p_mng_memory:0x%x chunk_size:%d\n" - , g_mm_memory_size - , (unsigned int)g_p_mng_memory - , chunk_size);) - - hmm_new_chunk(&hmm_d, (void *)g_p_mng_memory, chunk_size); - } - -#if MM_DYNAMIC_MEMORY - else - { - _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n" - , g_mm_memory_size);) - - i_rv = -1; - } - -#endif - } - - return i_rv; -} - -static void *vpx_mm_realloc(void *memblk, size_t size) -{ - void *p_ret = NULL; - - if (vpx_mm_create_heap_memory() < 0) - { - _P(printf("[vpx][mm] ERROR vpx_mm_realloc() Couldn't create memory for Heap.\n");) - } - else - { - int i_rv = 0; - int old_num_aaus; - int new_num_aaus; - - old_num_aaus = hmm_true_size(memblk); - new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - - if (old_num_aaus == new_num_aaus) - { - p_ret = memblk; - } - else - { - i_rv = hmm_resize(&hmm_d, memblk, new_num_aaus); - - if (i_rv == 0) - { - p_ret = memblk; - } - else - { - /* Error. Try to malloc and then copy data. */ - void *p_from_malloc; - - new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1; - p_from_malloc = hmm_alloc(&hmm_d, new_num_aaus); - - if (p_from_malloc) - { - vpx_memcpy(p_from_malloc, memblk, size); - hmm_free(&hmm_d, memblk); - - p_ret = p_from_malloc; - } - } - } - } - - return p_ret; -} -#endif //CONFIG_MEM_MANAGER - -#if USE_GLOBAL_FUNCTION_POINTERS -# if CONFIG_MEM_TRACKER -extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l - , g_calloc_func g_calloc_l - , g_realloc_func g_realloc_l - , g_free_func g_free_l - , g_memcpy_func g_memcpy_l - , g_memset_func g_memset_l - , g_memmove_func g_memmove_l); -# endif -#endif -int vpx_mem_set_functions(g_malloc_func g_malloc_l - , g_calloc_func g_calloc_l - , g_realloc_func g_realloc_l - , g_free_func g_free_l - , g_memcpy_func g_memcpy_l - , g_memset_func g_memset_l - , g_memmove_func g_memmove_l) -{ -#if USE_GLOBAL_FUNCTION_POINTERS - - /* If use global functions is turned on then the - application must set the global functions before - it does anything else or vpx_mem will have - unpredictable results. */ - if (!g_func) - { - g_func = (struct GLOBAL_FUNC_POINTERS *)g_malloc_l(sizeof(struct GLOBAL_FUNC_POINTERS)); - - if (!g_func) - { - return -1; - } - } - -#if CONFIG_MEM_TRACKER - { - int rv = 0; - rv = vpx_memory_tracker_set_functions(g_malloc_l - , g_calloc_l - , g_realloc_l - , g_free_l - , g_memcpy_l - , g_memset_l - , g_memmove_l); - - if (rv < 0) - { - return rv; - } - } -#endif - - if (g_malloc_l) - g_func->g_malloc = g_malloc_l; - else - g_func->g_malloc = 0; - - if (g_calloc_l) - g_func->g_calloc = g_calloc_l; - else - g_func->g_calloc = 0; - - if (g_realloc_l) - g_func->g_realloc = g_realloc_l; - else - g_func->g_realloc = 0; - - if (g_free_l) - g_func->g_free = g_free_l; - else - g_func->g_free = 0; - - if (g_memcpy_l) - g_func->g_memcpy = g_memcpy_l; - else - g_func->g_memcpy = 0; - - if (g_memset_l) - g_func->g_memset = g_memset_l; - else - g_func->g_memset = 0; - - if (g_memmove_l) - g_func->g_memmove = g_memmove_l; - else - g_func->g_memmove = 0; - - return 0; -#else - (void)g_malloc_l; - (void)g_calloc_l; - (void)g_realloc_l; - (void)g_free_l; - (void)g_memcpy_l; - (void)g_memset_l; - (void)g_memmove_l; - return -1; -#endif -} - -int vpx_mem_unset_functions() -{ -#if USE_GLOBAL_FUNCTION_POINTERS - - if (g_func) - { - g_free_func temp_free; - - temp_free = g_func->g_free; - - temp_free(g_func); - g_func = 0; - } - -#endif - return 0; -} - -#ifdef _INTEL_LINUX -void *_intel_fast_memcpy(void *dest, const void *src, size_t count) -{ - - //memcpy(dest, src, count); - char *dst8 = (char *)dest; - char *src8 = (char *)src; - - while (count--) - { - *dst8++ = *src8++; - } - - return dest; -} - -void *_intel_fast_memset(void *dest, int c, size_t count) -{ - memset(dest, c, count); - return dest; -} - -void *_VEC_memzero(void *dest, int c, size_t count) -{ - memset(dest, 0, count); - return dest; -} - -#endif //_ICC diff -Nru libvpx-0.9.5/vpx_mem/intel_linux/vpx_mem_tracker.c libvpx-0.9.6/vpx_mem/intel_linux/vpx_mem_tracker.c --- libvpx-0.9.5/vpx_mem/intel_linux/vpx_mem_tracker.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_mem/intel_linux/vpx_mem_tracker.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,812 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - vpx_mem_tracker.c - - jwz 2003-09-30: - Stores a list of addreses, their size, and file and line they came from. - All exposed lib functions are prefaced by vpx_ and allow the global list - to be thread safe. - Current supported platforms are: - Linux, Win32, win_ce and vx_works - Further support can be added by defining the platform specific mutex - in the memory_tracker struct as well as calls to create/destroy/lock/unlock - the mutex in vpx_memory_tracker_init/Destroy and memory_tracker_lock_mutex/unlock_mutex -*/ - -#define NO_MUTEX - -#if defined(__uClinux__) -# include -#endif - -#if defined(LINUX) || defined(__uClinux__) -# include -#elif defined(WIN32) || defined(_WIN32_WCE) -# define WIN32_LEAN_AND_MEAN -# include -# include -#elif defined(VXWORKS) -# include -#elif defined(NDS_NITRO) -# include -# include -#endif - -#include -#include -#include //VXWORKS doesn't have a malloc/memory.h file, -//this should pull in malloc,free,etc. -#include - -#include "vpx_mem_tracker.h" - -#undef vpx_malloc //undefine any vpx_mem macros that may affect calls to -#undef vpx_free //memory functions in this file -#undef vpx_memcpy -#undef vpx_memset - - -#ifndef USE_GLOBAL_FUNCTION_POINTERS -# define USE_GLOBAL_FUNCTION_POINTERS 0 //use function pointers instead of compiled functions. -#endif - -#if USE_GLOBAL_FUNCTION_POINTERS -static mem_track_malloc_func g_malloc = malloc; -static mem_track_calloc_func g_calloc = calloc; -static mem_track_realloc_func g_realloc = realloc; -static mem_track_free_func g_free = free; -static mem_track_memcpy_func g_memcpy = memcpy; -static mem_track_memset_func g_memset = memset; -static mem_track_memmove_func g_memmove = memmove; -# define MEM_TRACK_MALLOC g_malloc -# define MEM_TRACK_FREE g_free -# define MEM_TRACK_MEMCPY g_memcpy -# define MEM_TRACK_MEMSET g_memset -#else -# define MEM_TRACK_MALLOC vpx_malloc -# define MEM_TRACK_FREE vpx_free -# define MEM_TRACK_MEMCPY vpx_memcpy -# define MEM_TRACK_MEMSET vpx_memset -#endif // USE_GLOBAL_FUNCTION_POINTERS - - -struct memory_tracker -{ - struct mem_block *head, - * tail; - int len, - totalsize; - unsigned int current_allocated, - max_allocated; - -#if defined(LINUX) || defined(__uClinux__) - pthread_mutex_t mutex; -#elif defined(WIN32) || defined(_WIN32_WCE) - HANDLE mutex; -#elif defined(VXWORKS) - SEM_ID mutex; -#elif defined(NDS_NITRO) - OSMutex mutex; -#elif defined(NO_MUTEX) -#else -#error "No mutex type defined for this platform!" -#endif - - int padding_size, - pad_value; -}; - -/* prototypes for internal library functions */ -static void memtrack_log(const char *fmt, ...); -static void memory_tracker_dump(); -static void memory_tracker_check_integrity(char *file, unsigned int line); -static void memory_tracker_add(size_t addr, unsigned int size, - char *file, unsigned int line, - int padded); -static int memory_tracker_remove(size_t addr); -static struct mem_block *memory_tracker_find(size_t addr); - -#if defined(NO_MUTEX) -# define memory_tracker_lock_mutex() (!g_b_mem_tracker_inited) -# define memory_tracker_unlock_mutex() -#else -static int memory_tracker_lock_mutex(); -static int memory_tracker_unlock_mutex(); -#endif - -static struct memory_tracker memtrack; //our global memory allocation list -static int g_b_mem_tracker_inited = 0; //indicates whether the global list has -//been initialized (1:yes/0:no) -static struct -{ - FILE *file; - int type; - void (*func)(void *userdata, const char *fmt, va_list args); - void *userdata; -} g_logging = {0}; - -extern void *vpx_malloc(size_t size); -extern void vpx_free(void *memblk); -extern void *vpx_memcpy(void *dest, const void *src, size_t length); -extern void *vpx_memset(void *dest, int val, size_t length); - -/* - * - * Exposed library functions - * -*/ - -/* - vpx_memory_tracker_init(int padding_size, int pad_value) - padding_size - the size of the padding before and after each mem addr. - Values > 0 indicate that integrity checks can be performed - by inspecting these areas. - pad_value - the initial value within the padding area before and after - each mem addr. - - Initializes global memory tracker structure - Allocates the head of the list -*/ -int vpx_memory_tracker_init(int padding_size, int pad_value) -{ - if (!g_b_mem_tracker_inited) - { - if (memtrack.head = (struct mem_block *)MEM_TRACK_MALLOC(sizeof(struct mem_block))) - { - int ret; - - MEM_TRACK_MEMSET(memtrack.head, 0, sizeof(struct mem_block)); - - memtrack.tail = memtrack.head; - - memtrack.current_allocated = 0; - memtrack.max_allocated = 0; - - memtrack.padding_size = padding_size; - memtrack.pad_value = pad_value; - -#if defined(LINUX) || defined(__uClinux__) - ret = pthread_mutex_init(&memtrack.mutex, - NULL); /*mutex attributes (NULL=default)*/ -#elif defined(WIN32) || defined(_WIN32_WCE) - memtrack.mutex = create_mutex(NULL, /*security attributes*/ - FALSE, /*we don't want initial ownership*/ - NULL); /*mutex name*/ - ret = !memtrack.mutex; -#elif defined(VXWORKS) - memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/ - SEM_FULL); /*SEM_FULL initial state is unlocked*/ - ret = !memtrack.mutex; -#elif defined(NDS_NITRO) - os_init_mutex(&memtrack.mutex); - ret = 0; -#elif defined(NO_MUTEX) - ret = 0; -#endif - - if (ret) - { - memtrack_log("vpx_memory_tracker_init: Error creating mutex!\n"); - - MEM_TRACK_FREE(memtrack.head); - memtrack.head = NULL; - } - else - { - memtrack_log("Memory Tracker init'd, v."vpx_mem_tracker_version" pad_size:%d pad_val:0x%x %d\n" - , padding_size - , pad_value - , pad_value); - g_b_mem_tracker_inited = 1; - } - } - } - - return g_b_mem_tracker_inited; -} - -/* - vpx_memory_tracker_destroy() - If our global struct was initialized zeros out all its members, - frees memory and destroys it's mutex -*/ -void vpx_memory_tracker_destroy() -{ - - if (!memory_tracker_lock_mutex()) - { - struct mem_block *p = memtrack.head, - * p2 = memtrack.head; - - memory_tracker_dump(); - - while (p) - { - p2 = p; - p = p->next; - - MEM_TRACK_FREE(p2); - } - - memtrack.head = NULL; - memtrack.tail = NULL; - memtrack.len = 0; - memtrack.current_allocated = 0; - memtrack.max_allocated = 0; - - if ((g_logging.type == 0) && (g_logging.file != 0)) //&& (g_logging.file != stderr) ) - { -#if !defined(NDS_NITRO) - fclose(g_logging.file); -#endif - g_logging.file = NULL; - } - - memory_tracker_unlock_mutex(); - - g_b_mem_tracker_inited = 0; - - } - -} - -/* - vpx_memory_tracker_add(size_t addr, unsigned int size, - char * file, unsigned int line) - addr - memory address to be added to list - size - size of addr - file - the file addr was referenced from - line - the line in file addr was referenced from - Adds memory address addr, it's size, file and line it came from - to the global list via the thread safe internal library function -*/ -void vpx_memory_tracker_add(size_t addr, unsigned int size, - char *file, unsigned int line, - int padded) -{ - memory_tracker_add(addr, size, file, line, padded); -} - -/* - vpx_memory_tracker_remove(size_t addr) - addr - memory address to be removed from list - Removes addr from the global list via the thread safe - internal remove function - Return: - Same as described for memory_tracker_remove -*/ -int vpx_memory_tracker_remove(size_t addr) -{ - return memory_tracker_remove(addr); -} - -/* - vpx_memory_tracker_find(size_t addr) - addr - address to be found in list - Return: - If found, pointer to the memory block that matches addr - NULL otherwise -*/ -struct mem_block *vpx_memory_tracker_find(size_t addr) -{ - struct mem_block *p = NULL; - - if (!memory_tracker_lock_mutex()) - { - p = memory_tracker_find(addr); - memory_tracker_unlock_mutex(); - } - - return p; -} - -/* - vpx_memory_tracker_dump() - Locks the memory tracker's mutex and calls the internal - library function to dump the current contents of the - global memory allocation list -*/ -void vpx_memory_tracker_dump() -{ - if (!memory_tracker_lock_mutex()) - { - memory_tracker_dump(); - memory_tracker_unlock_mutex(); - } -} - -/* - vpx_memory_tracker_check_integrity(char* file, unsigned int line) - file - The file name where the check was placed - line - The line in file where the check was placed - Locks the memory tracker's mutex and calls the internal - integrity check function to inspect every address in the global - memory allocation list -*/ -void vpx_memory_tracker_check_integrity(char *file, unsigned int line) -{ - if (!memory_tracker_lock_mutex()) - { - memory_tracker_check_integrity(file, line); - memory_tracker_unlock_mutex(); - } -} - -/* - vpx_memory_tracker_set_log_type - Sets the logging type for the memory tracker. Based on the value it will - direct its output to the appropriate place. - Return: - 0: on success - -1: if the logging type could not be set, because the value was invalid - or because a file could not be opened -*/ -int vpx_memory_tracker_set_log_type(int type, char *option) -{ - int ret = -1; - - - switch (type) - { - case 0: - g_logging.type = 0; - - if (!option) - { - // g_logging.file = stderr; - ret = 0; - } - -#if !defined(NDS_NITRO) - else - { - if (g_logging.file = fopen((char *)option, "w")) - ret = 0; - } - -#endif - break; -#if defined(WIN32) && !defined(_WIN32_WCE) - case 1: - g_logging.type = type; - ret = 0; - break; -#endif - default: - break; - } - - //output the version to the new logging destination - if (!ret) - memtrack_log("Memory Tracker logging initialized, " - "Memory Tracker v."vpx_mem_tracker_version"\n"); - - return ret; -} - -/* - vpx_memory_tracker_set_log_func - Sets a logging function to be used by the memory tracker. - Return: - 0: on success - -1: if the logging type could not be set because logfunc was NULL -*/ -int vpx_memory_tracker_set_log_func(void *userdata, - void(*logfunc)(void *userdata, - const char *fmt, va_list args)) -{ - int ret = -1; - - if (logfunc) - { - g_logging.type = -1; - g_logging.userdata = userdata; - g_logging.func = logfunc; - ret = 0; - } - - //output the version to the new logging destination - if (!ret) - memtrack_log("Memory Tracker logging initialized, " - "Memory Tracker v."vpx_mem_tracker_version"\n"); - - return ret; -} - -/* - * - * END - Exposed library functions - * -*/ - - -/* - * - * Internal library functions - * -*/ - -static void memtrack_log(const char *fmt, ...) -{ - va_list list; - - va_start(list, fmt); - - switch (g_logging.type) - { - case -1: - - if (g_logging.func) - g_logging.func(g_logging.userdata, fmt, list); - - break; - case 0: - - if (g_logging.file) - { - vfprintf(g_logging.file, fmt, list); - fflush(g_logging.file); - } - - break; -#if defined(WIN32) && !defined(_WIN32_WCE) - case 1: - { - char temp[1024]; - _vsnprintf(temp, sizeof(temp) / sizeof(char) - 1, fmt, list); - output_debug_string(temp); - } - break; -#endif - default: - break; - } - - va_end(list); -} - -/* - memory_tracker_dump() - Dumps the current contents of the global memory allocation list -*/ -static void memory_tracker_dump() -{ - int i = 0; - struct mem_block *p = (memtrack.head ? memtrack.head->next : NULL); - - memtrack_log("\n_currently Allocated= %d; Max allocated= %d\n", - memtrack.current_allocated, memtrack.max_allocated); - - while (p) - { -#if defined(WIN32) && !defined(_WIN32_WCE) - - /*when using outputdebugstring, output filenames so they - can be clicked to be opened in visual studio*/ - if (g_logging.type == 1) - memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file:\n" - " %s(%d):\n", i, - p->addr, i, p->size, - p->file, p->line); - else -#endif - memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i, - p->addr, i, p->size, - p->file, p->line); - - p = p->next; - ++i; - } - - memtrack_log("\n"); -} - -/* - memory_tracker_check_integrity(char* file, unsigned int file) - file - the file name where the check was placed - line - the line in file where the check was placed - If a padding_size was supplied to vpx_memory_tracker_init() - this function will check ea. addr in the list verifying that - addr-padding_size and addr+padding_size is filled with pad_value -*/ -static void memory_tracker_check_integrity(char *file, unsigned int line) -{ - if (memtrack.padding_size) - { - int i, - index = 0; - unsigned char *p_show_me, - * p_show_me2; - unsigned int tempme = memtrack.pad_value, - dead1, - dead2; - unsigned char *x_bounds; - struct mem_block *p = memtrack.head->next; - - while (p) - { - //x_bounds = (unsigned char*)p->addr; - //back up VPX_BYTE_ALIGNMENT - //x_bounds -= memtrack.padding_size; - - if (p->padded) // can the bounds be checked? - { - /*yes, move to the address that was actually allocated - by the vpx_* calls*/ - x_bounds = (unsigned char *)(((size_t *)p->addr)[-1]); - - for (i = 0; i < memtrack.padding_size; i += sizeof(unsigned int)) - { - p_show_me = (x_bounds + i); - p_show_me2 = (unsigned char *)(p->addr + p->size + i); - - MEM_TRACK_MEMCPY(&dead1, p_show_me, sizeof(unsigned int)); - MEM_TRACK_MEMCPY(&dead2, p_show_me2, sizeof(unsigned int)); - - if ((dead1 != tempme) || (dead2 != tempme)) - { - memtrack_log("\n[vpx_mem integrity check failed]:\n" - " index[%d] {%s:%d} addr=0x%x, size=%d," - " file: %s, line: %d c0:0x%x c1:0x%x\n", - index, file, line, p->addr, p->size, p->file, - p->line, dead1, dead2); - } - } - } - - ++index; - p = p->next; - } - } -} - -/* - memory_tracker_add(size_t addr, unsigned int size, - char * file, unsigned int line) - Adds an address (addr), it's size, file and line number to our list. - Adjusts the total bytes allocated and max bytes allocated if necessary. - If memory cannot be allocated the list will be destroyed. -*/ -void memory_tracker_add(size_t addr, unsigned int size, - char *file, unsigned int line, - int padded) -{ - if (!memory_tracker_lock_mutex()) - { - struct mem_block *p; - - p = MEM_TRACK_MALLOC(sizeof(struct mem_block)); - - if (p) - { - p->prev = memtrack.tail; - p->prev->next = p; - p->addr = addr; - p->size = size; - p->line = line; - p->file = file; - p->padded = padded; - p->next = NULL; - - memtrack.tail = p; - - memtrack.current_allocated += size; - - if (memtrack.current_allocated > memtrack.max_allocated) - memtrack.max_allocated = memtrack.current_allocated; - - //memtrack_log("memory_tracker_add: added addr=0x%.8x\n", addr); - - memory_tracker_unlock_mutex(); - } - else - { - memtrack_log("memory_tracker_add: error allocating memory!\n"); - memory_tracker_unlock_mutex(); - vpx_memory_tracker_destroy(); - } - } -} - -/* - memory_tracker_remove(size_t addr) - Removes an address and its corresponding size (if they exist) - from the memory tracker list and adjusts the current number - of bytes allocated. - Return: - 0: on success - -1: if the mutex could not be locked - -2: if the addr was not found in the list -*/ -int memory_tracker_remove(size_t addr) -{ - int ret = -1; - - if (!memory_tracker_lock_mutex()) - { - struct mem_block *p; - - if (p = memory_tracker_find(addr)) - { - memtrack.current_allocated -= p->size; - - p->prev->next = p->next; - - if (p->next) - p->next->prev = p->prev; - else - memtrack.tail = p->prev; - - ret = 0; - MEM_TRACK_FREE(p); - } - else - { - memtrack_log("memory_tracker_remove(): addr not found in list, 0x%.8x\n", addr); - ret = -2; - } - - memory_tracker_unlock_mutex(); - } - - return ret; -} - -/* - memory_tracker_find(size_t addr) - Finds an address in our addrs list - NOTE: the mutex MUST be locked in the other internal - functions before calling this one. This avoids - the need for repeated locking and unlocking as in Remove - Returns: pointer to the mem block if found, NULL otherwise -*/ -static struct mem_block *memory_tracker_find(size_t addr) -{ - struct mem_block *p = NULL; - - if (memtrack.head) - { - p = memtrack.head->next; - - while (p && (p->addr != addr)) - p = p->next; - } - - return p; -} - - -#if !defined(NO_MUTEX) -/* - memory_tracker_lock_mutex() - Locks the memory tracker mutex with a platform specific call - Returns: - 0: Success - <0: Failure, either the mutex was not initialized - or the call to lock the mutex failed -*/ -static int memory_tracker_lock_mutex() -{ - int ret = -1; - - if (g_b_mem_tracker_inited) - { - -#if defined(LINUX) || defined(__uClinux__) - ret = pthread_mutex_lock(&memtrack.mutex); -#elif defined(WIN32) || defined(_WIN32_WCE) - ret = WaitForSingleObject(memtrack.mutex, INFINITE); -#elif defined(VXWORKS) - ret = sem_take(memtrack.mutex, WAIT_FOREVER); -#elif defined(NDS_NITRO) - os_lock_mutex(&memtrack.mutex); - ret = 0; -#endif - - if (ret) - { - memtrack_log("memory_tracker_lock_mutex: mutex lock failed\n"); - } - } - - return ret; -} - -/* - memory_tracker_unlock_mutex() - Unlocks the memory tracker mutex with a platform specific call - Returns: - 0: Success - <0: Failure, either the mutex was not initialized - or the call to unlock the mutex failed -*/ -static int memory_tracker_unlock_mutex() -{ - int ret = -1; - - if (g_b_mem_tracker_inited) - { - -#if defined(LINUX) || defined(__uClinux__) - ret = pthread_mutex_unlock(&memtrack.mutex); -#elif defined(WIN32) || defined(_WIN32_WCE) - ret = !release_mutex(memtrack.mutex); -#elif defined(VXWORKS) - ret = sem_give(memtrack.mutex); -#elif defined(NDS_NITRO) - os_unlock_mutex(&memtrack.mutex); - ret = 0; -#endif - - if (ret) - { - memtrack_log("memory_tracker_unlock_mutex: mutex unlock failed\n"); - } - } - - return ret; -} -#endif - -/* - vpx_memory_tracker_set_functions - - Sets the function pointers for the standard library functions. - - Return: - 0: on success - -1: if the use global function pointers is not set. -*/ -int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l - , mem_track_calloc_func g_calloc_l - , mem_track_realloc_func g_realloc_l - , mem_track_free_func g_free_l - , mem_track_memcpy_func g_memcpy_l - , mem_track_memset_func g_memset_l - , mem_track_memmove_func g_memmove_l) -{ -#if USE_GLOBAL_FUNCTION_POINTERS - - if (g_malloc_l) - g_malloc = g_malloc_l; - - if (g_calloc_l) - g_calloc = g_calloc_l; - - if (g_realloc_l) - g_realloc = g_realloc_l; - - if (g_free_l) - g_free = g_free_l; - - if (g_memcpy_l) - g_memcpy = g_memcpy_l; - - if (g_memset_l) - g_memset = g_memset_l; - - if (g_memmove_l) - g_memmove = g_memmove_l; - - return 0; -#else - (void)g_malloc_l; - (void)g_calloc_l; - (void)g_realloc_l; - (void)g_free_l; - (void)g_memcpy_l; - (void)g_memset_l; - (void)g_memmove_l; - return -1; -#endif -} diff -Nru libvpx-0.9.5/vpx_mem/nds/vpx_mem_nds.c libvpx-0.9.6/vpx_mem/nds/vpx_mem_nds.c --- libvpx-0.9.5/vpx_mem/nds/vpx_mem_nds.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_mem/nds/vpx_mem_nds.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#define __VPX_MEM_C__ -#include "vpx_mem.h" -#include -#include "vpx_mem_intrnl.h" - -// Allocate memory from the Arena specified by id. Align it to -// the value specified by align. -void *vpx_mem_nds_alloc(osarena_id id, osheap_handle handle, size_t size, size_t align) -{ - void *addr, - * x = NULL; - - addr = os_alloc_from_heap((osarena_id) id, handle, - size + align - 1 + ADDRESS_STORAGE_SIZE); - - if (addr) - { - x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); - - // save the actual malloc address - ((size_t *)x)[-1] = (size_t)addr; - } - - return x; -} - -// Free them memory allocated by vpx_mem_nds_alloc -void vpx_mem_nds_free(osarena_id id, osheap_handle handle, void *mem) -{ - if (mem) - { - void *addr = (void *)(((size_t *)mem)[-1]); - os_free_to_heap(id, handle, addr); - } -} - -int vpx_nds_alloc_heap(osarena_id id, u32 size) -{ - osheap_handle arena_handle; - void *nstart; - void *heap_start; - - nstart = os_init_alloc(id, os_get_arena_lo(id), os_get_arena_hi(id), 1); - os_set_arena_lo(id, nstart); - - heap_start = os_alloc_from_arena_lo(id, size, 32); - arena_handle = os_create_heap(id, heap_start, (void *)((u32)heap_start + size)); - - if (os_check_heap(id, arena_handle) == -1) - return -1; //ERROR: DTCM heap is not consistent - - (void)os_set_current_heap(id, arena_handle); - - return arena_handle; -} diff -Nru libvpx-0.9.5/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c libvpx-0.9.6/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c --- libvpx-0.9.5/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#define __VPX_MEM_C__ - -#include "..\include\vpx_mem.h" -#include -#include -#include -#include "..\include\vpx_mem_intrnl.h" - -void *vpx_mem_alloc(int id, size_t size, size_t align) -{ -#if defined CHIP_DM642 || defined __uClinux__ - void *mem = (void *)mem_alloc(id, size, align); - - if (!mem) - { - _P(fprintf(stderr, - "\n" - "*********************************************************\n" - "WARNING: mem_alloc returned 0 for id=%p size=%u align=%u.\n" - "*********************************************************\n", - mem, size, align)); - // should no longer need this. Softier says it's fixed. 2005-01-21 tjf - //#if defined __uClinux__ - //while(1)usleep(1000000); - //#endif - } - -#if defined __uClinux__ - else if (mem == (void *)0xFFFFFFFF) - { - // out of memory/error - mem = (void *)0; - - _P(fprintf(stderr, - "\n" - "******************************************************\n" - "ERROR: mem_alloc id=%p size=%u align=%u OUT OF MEMORY.\n" - "******************************************************\n", - mem, size, align)); - } - -#endif // __uClinux__ - - return mem; -#else - (void)id; - (void)size; - (void)align; - return (void *)0; -#endif -} - -void vpx_mem_free(int id, void *mem, size_t size) -{ -#if defined CHIP_DM642 || defined __uClinux__ - - if (!mem) - { - _P(fprintf(stderr, - "\n" - "**************************************\n" - "WARNING: 0 being free'd id=%p size=%u.\n" - "**************************************\n", - id, size)); - - // should no longer need this. Softier says it's fixed. 2005-01-21 tjf - //#if defined __uClinux__ - //while(1)usleep(1000000); - //#endif - } - - mem_free(id, mem, size); -#else - (void)id; - (void)mem; - (void)size; -#endif -} - -#if CONFIG_MEM_TRACKER -void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line) -{ - void *mem = vpx_mem_alloc(id, size, align); - - vpx_memory_tracker_add((size_t)mem, size, file, line, 0); - - return mem; -} - -void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line) -{ - if (vpx_memory_tracker_remove((size_t)mem) == -2) - { -#if REMOVE_PRINTFS - (void)file; - (void)line; -#endif - _P(fprintf(stderr, "[vpx_mem][xvpx_mem_free] addr: %p (id=%p size=%u) " - "not found in list; freed from file:%s" - " line:%d\n", mem, id, size, file, line)); - } - - vpx_mem_free(id, mem, size); -} -#endif /*CONFIG_MEM_TRACKER*/ diff -Nru libvpx-0.9.5/vpx_ports/mem_ops_aligned.h libvpx-0.9.6/vpx_ports/mem_ops_aligned.h --- libvpx-0.9.5/vpx_ports/mem_ops_aligned.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_ports/mem_ops_aligned.h 2011-03-04 20:40:40.000000000 +0000 @@ -9,12 +9,12 @@ */ -/* \file mem_ops_aligned.h - * \brief Provides portable memory access primatives for operating on aligned +/* \file + * \brief Provides portable memory access primitives for operating on aligned * data * - * This file is split from mem_ops.h for easier maintainence. See mem_ops.h - * for a more detailed description of these primatives. + * This file is split from mem_ops.h for easier maintenance. See mem_ops.h + * for a more detailed description of these primitives. */ #ifndef INCLUDED_BY_MEM_OPS_H #error Include mem_ops.h, not mem_ops_aligned.h directly. diff -Nru libvpx-0.9.5/vpx_ports/mem_ops.h libvpx-0.9.6/vpx_ports/mem_ops.h --- libvpx-0.9.5/vpx_ports/mem_ops.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_ports/mem_ops.h 2011-03-04 20:40:40.000000000 +0000 @@ -9,10 +9,10 @@ */ -/* \file mem_ops.h - * \brief Provides portable memory access primatives +/* \file + * \brief Provides portable memory access primitives * - * This function provides portable primatives for getting and setting of + * This function provides portable primitives for getting and setting of * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations * can be performed on unaligned data regardless of hardware support for * unaligned accesses. diff -Nru libvpx-0.9.5/vpx_ports/vpx_timer.h libvpx-0.9.6/vpx_ports/vpx_timer.h --- libvpx-0.9.5/vpx_ports/vpx_timer.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_ports/vpx_timer.h 2011-03-04 20:40:40.000000000 +0000 @@ -12,6 +12,8 @@ #ifndef VPX_TIMER_H #define VPX_TIMER_H +#if CONFIG_OS_SUPPORT + #if defined(_WIN32) /* * Win32 specific includes @@ -93,5 +95,27 @@ #endif } +#else /* CONFIG_OS_SUPPORT = 0*/ + +/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ +#ifndef timersub +#define timersub(a, b, result) +#endif + +struct vpx_usec_timer +{ + void *dummy; +}; + +static void +vpx_usec_timer_start(struct vpx_usec_timer *t) { } + +static void +vpx_usec_timer_mark(struct vpx_usec_timer *t) { } + +static long +vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; } + +#endif /* CONFIG_OS_SUPPORT */ #endif diff -Nru libvpx-0.9.5/vpx_ports/x86.h libvpx-0.9.6/vpx_ports/x86.h --- libvpx-0.9.5/vpx_ports/x86.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_ports/x86.h 2011-03-04 20:40:40.000000000 +0000 @@ -74,6 +74,7 @@ #define HAS_SSE2 0x04 #define HAS_SSE3 0x08 #define HAS_SSSE3 0x10 +#define HAS_SSE4_1 0x20 #ifndef BIT #define BIT(n) (1< - -/**************************************************************************** -* Imports -****************************************************************************/ -void -extend_memset(void *dst, unsigned char value, unsigned int size); - -/**************************************************************************** - * - ****************************************************************************/ -int -vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) -{ - if (ybf) - { - if (ybf->buffer_alloc) - { - duck_free(ybf->buffer_alloc); - } - - ybf->buffer_alloc = 0; - } - else - { - return -1; - } - - return 0; -} - -/**************************************************************************** - * - ****************************************************************************/ -int -vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) -{ -//NOTE: - - int yplane_size = (height + 2 * border) * (width + 2 * border); - int uvplane_size = (height / 2 + border) * (width / 2 + border); - - if (ybf) - { - vp8_yv12_de_alloc_frame_buffer(ybf); - - ybf->y_width = width; - ybf->y_height = height; - ybf->y_stride = width + 2 * border; - - ybf->uv_width = width / 2; - ybf->uv_height = height / 2; - ybf->uv_stride = ybf->uv_width + border; - - ybf->border = border; - - // Added 2 extra lines to framebuffer so that copy12x12 doesn't fail - // when we have a large motion vector in V on the last v block. - // Note : We never use these pixels anyway so this doesn't hurt. - ybf->buffer_alloc = (unsigned char *) duck_memalign(32, (yplane_size * 3 / 2) + ybf->y_stride , 0); - - if (ybf->buffer_alloc == NULL) - return -1; - - ybf->y_buffer = ybf->buffer_alloc + border * ybf->y_stride + border; - ybf->u_buffer = ybf->buffer_alloc + yplane_size + border / 2 * ybf->uv_stride + border / 2; - ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + border / 2 * ybf->uv_stride + border / 2; - } - else - { - return -2; - } - - return 0; -} -/**************************************************************************** - * - ****************************************************************************/ -int -vp8_yv12_black_frame_buffer(YV12_BUFFER_CONFIG *ybf) -{ - if (ybf) - { - if (ybf->buffer_alloc) - { - extend_memset(ybf->y_buffer, 0x0, ybf->y_stride *(ybf->y_height + 2 * ybf->border)); - extend_memset(ybf->u_buffer, 0x80, ybf->uv_stride *(ybf->uv_height + ybf->border)); - extend_memset(ybf->v_buffer, 0x80, ybf->uv_stride *(ybf->uv_height + ybf->border)); - } - - return 0; - } - - return -1; -} diff -Nru libvpx-0.9.5/vpx_scale/blackfin/yv12extend.c libvpx-0.9.6/vpx_scale/blackfin/yv12extend.c --- libvpx-0.9.5/vpx_scale/blackfin/yv12extend.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/blackfin/yv12extend.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,350 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : yv12extend.c - * - * Description : - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include - -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** -* -****************************************************************************/ - - -/**************************************************************************** -* -****************************************************************************/ -void -extend_memset(void *dst, unsigned char value, unsigned int size) -{ -#if 0 - unsigned int quad_value; - - quad_value = (unsigned int) value; - quad_value |= (unsigned int) value << 8; - quad_value |= (unsigned int) value << 16; - quad_value |= (unsigned int) value << 24; -#else - unsigned short quad_value; - - quad_value = (unsigned int) value; - quad_value |= (unsigned int) value << 8; -#endif - - - if (size / 2 >= 64 * 1024) - printf("_Extend_memset__________ dma memset is broken\n"); - - *p_mdma_s1_start_addr = &quad_value; - *p_mdma_s1_x_count = size / 2; - *p_mdma_s1_x_modify = 0x0; - *p_mdma_d1_start_addr = dst; - *p_mdma_d1_x_count = size / 2; - *p_mdma_d1_x_modify = 2; - - *p_mdma_s1_config = DMAEN | WDSIZE_16; - asm("ssync;"); - - *p_mdma_d1_config = DI_EN | DMAEN | WNR | WDSIZE_16; - asm("ssync;"); - - while ((*p_mdma_d1_irq_status & DMA_DONE) == 0); - - *p_mdma_d1_irq_status |= DMA_DONE; -} - -/**************************************************************************** -* -****************************************************************************/ -void -extend_memcpy(void *dst, void *src, unsigned int size) -{ - if (size / 2 >= 64 * 1024) - printf("_Extend_memcpy__________ dma memcpy is broken\n"); - - - if ((size & 0x3)) - printf("_)__________ size not a multiple of 4\n"); - -//32 bit dma here caused some data to be corrupted --- WHY ?????? - - *p_mdma_s1_start_addr = src; - *p_mdma_s1_x_count = size / 2; - *p_mdma_s1_x_modify = 2; - *p_mdma_d1_start_addr = dst; - *p_mdma_d1_x_count = size / 2; - *p_mdma_d1_x_modify = 2; - - *p_mdma_s1_config = DMAEN | WDSIZE_16; - asm("ssync;"); - - *p_mdma_d1_config = DI_EN | DMAEN | WNR | WDSIZE_16; - asm("ssync;"); - - while ((*p_mdma_d1_irq_status & DMA_DONE) == 0); - - *p_mdma_d1_irq_status |= DMA_DONE; -} - -/**************************************************************************** - * - ****************************************************************************/ -void -vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf) -{ -#if 1 - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - unsigned int quad_sample; - unsigned int sample; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - // copy the left and right most columns out - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - extend_memset(dest_ptr1, src_ptr1[0], Border); - extend_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - extend_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - for (i = 0; i < (int)Border; i++) - { - extend_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; - - /***********/ - /* U Plane */ - /***********/ - - // copy the left and right most columns out - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - extend_memset(dest_ptr1, src_ptr1[0], Border); - extend_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - extend_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - for (i = 0; i < (int)(Border); i++) - { - extend_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - // copy the left and right most columns out - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - extend_memset(dest_ptr1, src_ptr1[0], Border); - extend_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - extend_memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - for (i = 0; i < (int)(Border); i++) - { - extend_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - -#endif -} -/**************************************************************************** - * - * ROUTINE : vp8_yv12_copy_frame - * - * INPUTS : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies the source image into the destination image and - * updates the destination's UMV borders. - * - * SPECIAL NOTES : The frames are assumed to be identical in size. - * - ****************************************************************************/ -void -vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ -#if 1 - int row; - unsigned char *source, *dest; - - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; - - for (row = 0; row < src_ybc->y_height; row++) - { - extend_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } - - source = src_ybc->u_buffer; - dest = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - extend_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - source = src_ybc->v_buffer; - dest = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - extend_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders(dst_ybc); - -#else - int row; - char *source, *dest; - int height; - int width; - - height = src_ybc->y_height + (src_ybc->border * 2); - width = src_ybc->y_width + (src_ybc->border * 2); - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; - - for (row = 0; row < height; row++) - { - extend_memcpy(dest, source, width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } - - height = src_ybc->uv_height + (src_ybc->border); - width = src_ybc->uv_width + (src_ybc->border); - - source = src_ybc->u_buffer; - dest = dst_ybc->u_buffer; - - for (row = 0; row < height; row++) - { - extend_memcpy(dest, source, width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - source = src_ybc->v_buffer; - dest = dst_ybc->v_buffer; - - for (row = 0; row < height; row++) - { - extend_memcpy(dest, source, width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - -#endif - -} diff -Nru libvpx-0.9.5/vpx_scale/dm642/bicubic_scaler_c64.c libvpx-0.9.6/vpx_scale/dm642/bicubic_scaler_c64.c --- libvpx-0.9.5/vpx_scale/dm642/bicubic_scaler_c64.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/dm642/bicubic_scaler_c64.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include -#include -#include -#include "vpx_mem/vpx_mem.h" -#include "vpxscale_arbitrary.h" - -extern BICUBIC_SCALER_STRUCT g_b_scaler; - -int bicubic_scale_c64(int in_width, int in_height, int in_stride, - int out_width, int out_height, int out_stride, - unsigned char *input_image, unsigned char *output_image) -{ - short *restrict l_w, * restrict l_h; - short *restrict c_w, * restrict c_h; - unsigned char *restrict ip, * restrict op, *restrict op_w; - unsigned char *restrict hbuf; - int h, w, lw, lh; - int phase_offset_w, phase_offset_h; - double coeff; - int max_phase; - - c_w = g_b_scaler.c_w; - c_h = g_b_scaler.c_h; - - op = output_image; - - l_w = g_b_scaler.l_w; - l_h = g_b_scaler.l_h; - - phase_offset_h = 0; - - for (h = 0; h < out_height; h++) - { - // select the row to work on - lh = l_h[h]; - ip = input_image + (in_stride * lh); - - coeff = _memd8_const(&c_h[phase_offset_h*4]); - - // vp8_filter the row vertically into an temporary buffer. - // If the phase offset == 0 then all the multiplication - // is going to result in the output equalling the input. - // So instead point the temporary buffer to the input. - // Also handle the boundry condition of not being able to - // filter that last lines. - if (phase_offset_h && (lh < in_height - 2)) - { - hbuf = g_b_scaler.hbuf; - - for (w = 0; w < in_width; w += 4) - { - int ip1, ip2, ip3, ip4; - int y13_12, y11_10, y23_22, y21_20, y33_32, y31_30, y43_42, y41_40; - int y10_20, y11_21, y12_22, y13_23, y30_40, y31_41, y32_42, y33_43; - int s1, s2, s3, s4; - - ip1 = _mem4_const(&ip[w - in_stride]); - ip2 = _mem4_const(&ip[w]); - ip3 = _mem4_const(&ip[w + in_stride]); - ip4 = _mem4_const(&ip[w + 2*in_stride]); - - // realignment of data. Unpack the data so that it is in short - // format instead of bytes. - y13_12 = _unpkhu4(ip1); - y11_10 = _unpklu4(ip1); - y23_22 = _unpkhu4(ip2); - y21_20 = _unpklu4(ip2); - y33_32 = _unpkhu4(ip3); - y31_30 = _unpklu4(ip3); - y43_42 = _unpkhu4(ip4); - y41_40 = _unpklu4(ip4); - - // repack the data so that elements 1 and 2 are together. this - // lines up so that a dot product with the coefficients can be - // done. - y10_20 = _pack2(y11_10, y21_20); - y11_21 = _packh2(y11_10, y21_20); - y12_22 = _pack2(y13_12, y23_22); - y13_23 = _packh2(y13_12, y23_22); - - s1 = _dotp2(_hi(coeff), y10_20); - s2 = _dotp2(_hi(coeff), y11_21); - s3 = _dotp2(_hi(coeff), y12_22); - s4 = _dotp2(_hi(coeff), y13_23); - - y30_40 = _pack2(y31_30, y41_40); - y31_41 = _packh2(y31_30, y41_40); - y32_42 = _pack2(y33_32, y43_42); - y33_43 = _packh2(y33_32, y43_42); - - // now repack elements 3 and 4 together. - s1 += _dotp2(_lo(coeff), y30_40); - s2 += _dotp2(_lo(coeff), y31_41); - s3 += _dotp2(_lo(coeff), y32_42); - s4 += _dotp2(_lo(coeff), y33_43); - - s1 = s1 >> 12; - s2 = s2 >> 12; - s3 = s3 >> 12; - s4 = s4 >> 12; - - s1 = _pack2(s2, s1); - s2 = _pack2(s4, s3); - - _amem4(&hbuf[w]) = _spacku4(s2, s1); - } - } - else - hbuf = ip; - - // increase the phase offset for the next time around. - if (++phase_offset_h >= g_b_scaler.nh) - phase_offset_h = 0; - - op_w = op; - - // will never be able to interpolate first pixel, so just copy it - // over here. - phase_offset_w = 1; - *op_w++ = hbuf[0]; - - if (1 >= g_b_scaler.nw) phase_offset_w = 0; - - max_phase = g_b_scaler.nw; - - for (w = 1; w < out_width; w++) - { - double coefficients; - int hbuf_high, hbuf_low, hbuf_both; - int sum_high, sum_low, sum; - - // get the index to use to expand the image - lw = l_w[w]; - coefficients = _amemd8_const(&c_w[phase_offset_w*4]); - hbuf_both = _mem4_const(&hbuf[lw-1]); - - hbuf_high = _unpkhu4(hbuf_both); - hbuf_low = _unpklu4(hbuf_both); - - sum_high = _dotp2(_hi(coefficients), hbuf_high); - sum_low = _dotp2(_lo(coefficients), hbuf_low); - - sum = (sum_high + sum_low) >> 12; - - if (++phase_offset_w >= max_phase) - phase_offset_w = 0; - - if ((lw + 2) >= in_width) - sum = hbuf[lw]; - - *op_w++ = sum; - } - - op += out_stride; - } - - return 0; -} - -void bicubic_scale_frame_c64(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int new_width, int new_height) -{ - - dst->y_width = new_width; - dst->y_height = new_height; - dst->uv_width = new_width / 2; - dst->uv_height = new_height / 2; - - dst->y_stride = dst->y_width; - dst->uv_stride = dst->uv_width; - - bicubic_scale_c64(src->y_width, src->y_height, src->y_stride, - new_width, new_height, dst->y_stride, - src->y_buffer, dst->y_buffer); - - bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->u_buffer, dst->u_buffer); - - bicubic_scale_c64(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->v_buffer, dst->v_buffer); -} diff -Nru libvpx-0.9.5/vpx_scale/dm642/gen_scalers_c64.c libvpx-0.9.6/vpx_scale/dm642/gen_scalers_c64.c --- libvpx-0.9.5/vpx_scale/dm642/gen_scalers_c64.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/dm642/gen_scalers_c64.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,608 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : gen_scalers.c - * - * Description : Generic image scaling functions. - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" - -/**************************************************************************** -* Imports -****************************************************************************/ - -/**************************************************************************** - * - * ROUTINE : horizontal_line_4_5_scale_c4 - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 4 to 5. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_4_5_scale_c64 -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int ba, cb, dc, ed; - unsigned char *restrict des = dest; - unsigned int *restrict src = (unsigned int *)source; - unsigned int const_51_205, const_102_154, - const_205_51, const_154_102; - - unsigned int src_current, src_next; - - (void) dest_width; - - // Constants that are to be used for the filtering. For - // best speed we are going to want to right shift by 16. - // In the generic version they were shift by 8, so put - // an extra 8 in now so that 16 will come out later. - const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - // 5 points are needed to filter to give 5 output points. - // A load can pull up 4 at a time, and one needs to be - // "borrowed" from the next set of data. So instead of - // loading those 5 points each time, "steal" a point from - // the next set and only load up 4 each time through. - src_current = _mem4(src); - - for (i = 0; i < source_width - 4; i += 4) - { - src_next = _mem4(src++); - - // Reorder the data so that it is ready for the - // dot product. - ba = _unpklu4(src_current); - cb = _unpkhu4(_rotl(src_current, 8)); - dc = _unpkhu4(src_current); - ed = _unpkhu4(_shrmb(src_next, src_current)); - - // Use the dot product with round and shift. - des [0] = src_current & 0xff; - des [1] = _dotprsu2(ba, const_205_51); - des [2] = _dotprsu2(cb, const_154_102); - des [3] = _dotprsu2(dc, const_102_154); - des [4] = _dotprsu2(ed, const_51_205); - - des += 5; - - // reuse loaded vales next time around. - src_current = src_next; - } - - // vp8_filter the last set of points. Normally a point from the next set - // would be used, but there is no next set, so just fill. - ba = _unpklu4(src_current); - cb = _unpkhu4(_rotl(src_current, 8)); - dc = _unpkhu4(src_current); - - des [0] = src_current & 0xff; - des [1] = _dotprsu2(ba, const_205_51); - des [2] = _dotprsu2(cb, const_154_102); - des [3] = _dotprsu2(dc, const_102_154); - des [4] = src_current & 0xff; - -} -/**************************************************************************** - * - * ROUTINE : vertical_band_4_5_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -static -void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d, e; - unsigned int ba, cb, dc, ed; - unsigned char *restrict src = dest; - unsigned char *restrict des = dest; - unsigned int const_51_205, const_102_154, - const_205_51, const_154_102; - - const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - // Force a loop unroll here so that there is not such a - // dependancy. - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*3]; - e = src [dest_pitch*5]; - src ++; - - for (i = 0; i < dest_width; i++) - { - ba = _pack2(b, a); - cb = _pack2(c, b); - dc = _pack2(d, c); - ed = _pack2(e, d); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*3]; - e = src [dest_pitch*5]; - src ++; - - des [dest_pitch] = _dotprsu2(ba, const_205_51); - des [dest_pitch*2] = _dotprsu2(cb, const_154_102); - des [dest_pitch*3] = _dotprsu2(dc, const_102_154); - des [dest_pitch*4] = _dotprsu2(ed, const_51_205); - - des ++; - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_4_5_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -static -void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned int ba, cb, dc; - unsigned char *restrict src = dest; - unsigned char *restrict des = dest; - unsigned int const_102_154, const_205_51, const_154_102; - - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*3]; - src ++; - - for (i = 0; i < dest_width; ++i) - { - ba = _pack2(b, a); - cb = _pack2(c, b); - dc = _pack2(d, c); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*3]; - src ++; - - des [dest_pitch] = _dotprsu2(ba, const_205_51); - des [dest_pitch*2] = _dotprsu2(cb, const_154_102); - des [dest_pitch*3] = _dotprsu2(dc, const_102_154); - des [dest_pitch*4] = (unsigned char) d; - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : horizontal_line_3_5_scale_c64 - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 3 to 5. - * - * SPECIAL NOTES : None. - * - * - ****************************************************************************/ -static -void horizontal_line_3_5_scale_c64 -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int ba, cb, dc; - unsigned int src_current; - unsigned char *restrict des = dest; - unsigned char *restrict src = (unsigned char *)source; - unsigned int const_51_205, const_102_154, - const_205_51, const_154_102; - - (void) dest_width; - - const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - for (i = 0; i < source_width - 3; i += 3) - { - src_current = _mem4(src); - - // Reorder the data so that it is ready for the - // dot product. - ba = _unpklu4(src_current); - cb = _unpkhu4(_rotl(src_current, 8)); - dc = _unpkhu4(src_current); - - des [0] = src_current & 0xff; - des [1] = _dotprsu2(ba, const_154_102); - des [2] = _dotprsu2(cb, const_51_205); - des [3] = _dotprsu2(cb, const_205_51); - des [4] = _dotprsu2(dc, const_102_154); - - src += 3; - des += 5; - } - - src_current = _mem4(src); - - ba = _unpklu4(src_current); - cb = _unpkhu4(_rotl(src_current, 8)); - dc = _unpkhu4(src_current); - - - des [0] = src_current & 0xff; - des [1] = _dotprsu2(ba, const_154_102); - des [2] = _dotprsu2(cb, const_51_205); - des [3] = _dotprsu2(cb, const_205_51); - des [4] = dc & 0xff; - -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_3_5_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -static -void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned int ba, cb, dc; - unsigned char *restrict src = dest; - unsigned char *restrict des = dest; - unsigned int const_51_205, const_102_154, - const_205_51, const_154_102; - - const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*5]; - src ++; - - for (i = 0; i < dest_width; i++) - { - ba = _pack2(b, a); - cb = _pack2(c, b); - dc = _pack2(d, c); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - d = src [dest_pitch*5]; - src ++; - - des [dest_pitch] = _dotprsu2(ba, const_154_102); - des [dest_pitch*2] = _dotprsu2(cb, const_51_205); - des [dest_pitch*3] = _dotprsu2(cb, const_205_51); - des [dest_pitch*4] = _dotprsu2(dc, const_102_154); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_3_5_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -static -void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned int ba, cb; - unsigned char *restrict src = dest; - unsigned char *restrict des = dest; - unsigned int const_51_205, const_205_51, const_154_102; - - const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); - const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); - const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - src ++; - - for (i = 0; i < dest_width; ++i) - { - ba = _pack2(b, a); - cb = _pack2(c, b); - - a = src [0]; - b = src [dest_pitch]; - c = src [dest_pitch*2]; - src ++; - - des [dest_pitch] = _dotprsu2(ba, const_154_102); - des [dest_pitch*2] = _dotprsu2(cb, const_51_205); - des [dest_pitch*3] = _dotprsu2(cb, const_205_51); - des [dest_pitch*4] = (unsigned char)(c) ; - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : horizontal_line_1_2_scale_c64 - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 1 to 2. - * - * SPECIAL NOTES : source width must be a multiple of 4. - * - ****************************************************************************/ -void horizontal_line_1_2_scale_c64 -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned char *restrict des = dest; - unsigned char *restrict src = (unsigned char *)source; - unsigned int src7_4i, src4_1i, src3_0i; - unsigned int a4_0i, ahi, alo; - double src7_0d, src3_0d; - const unsigned int k01 = 0x01010101; - - for (i = 0; i < source_width / 4; i += 1) - { - // Load up the data from src. Here a wide load is - // used to get 8 bytes at once, only 5 will be used - // for the actual computation. - src7_0d = _memd8(src); - src3_0i = _lo(src7_0d); - src7_4i = _hi(src7_0d); - - // Need to average between points. Shift byte 5 into - // the lower word. This will result in bytes 5-1 - // averaged with 4-0. - src4_1i = _shrmb(src7_4i, src3_0i); - a4_0i = _avgu4(src4_1i, src3_0i); - - // Expand the data out. Could do an unpack, however - // all but the multiply units are getting pretty hard - // here the multiply unit can take some of the computations. - src3_0d = _mpyu4(src3_0i, k01); - - // The averages need to be unpacked so that they are in 16 - // bit form and will be able to be interleaved with the - // original data - ahi = _unpkhu4(a4_0i); - alo = _unpklu4(a4_0i); - - ahi = _swap4(ahi); - alo = _swap4(alo); - - // Mix the average result in with the orginal data. - ahi = _hi(src3_0d) | ahi; - alo = _lo(src3_0d) | alo; - - _memd8(des) = _itod(ahi, alo); - - des += 8; - src += 4; - } -} - - -/**************************************************************************** - * - * ROUTINE : vertical_band_1_2_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * Destination width must be a multiple of 4. Because the - * intput must be, therefore the output must be. - * - ****************************************************************************/ -static -void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned int *restrict line_a = (unsigned int *)dest; - unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2)); - unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); - - for (i = 0; i < dest_width / 4; i++) - { - a = _mem4(line_a++); - b = _mem4(line_b++); - - _mem4(des++) = _avgu4(a, b); - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_1_2_scale_c64 - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. Again, width must be a multiple of 4. - * - ****************************************************************************/ -static -void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int *restrict src = (unsigned int *)dest; - unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); - - for (i = 0; i < dest_width / 4; ++i) - { - _mem4(des++) = _mem4(src++); - } -} - -void -register_generic_scalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_c64; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_c64; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_c64; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_c64; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_c64; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_c64; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_c64; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_c64; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_c64; -} diff -Nru libvpx-0.9.5/vpx_scale/dm642/yv12extend.c libvpx-0.9.6/vpx_scale/dm642/yv12extend.c --- libvpx-0.9.5/vpx_scale/dm642/yv12extend.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/dm642/yv12extend.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : yv12extend.c - * - * Description : - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -//#include -#include "csl_dat.h" -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** -* Exports -****************************************************************************/ -#define UINT8 unsigned char -#define UINT32 unsigned int - - -static inline -void copy_yleft_right_border( - UINT8 *restrict src_ptr1, - UINT8 *restrict src_ptr2, - UINT8 *restrict dest_ptr1, - UINT8 *restrict dest_ptr2, - UINT32 plane_height, - UINT32 plane_stride -) -{ - UINT32 left, right, left2, left4, right2, right4; - double dl, dr; - int i; - -#pragma MUST_ITERATE(16,16,16) - - for (i = 0; i < plane_height; i++) - { - left = src_ptr1[0]; - right = src_ptr2[0]; - - left2 = _pack2(left, left); - left4 = _packl4(left2, left2); - - right2 = _pack2(right, right); - right4 = _packl4(right2, right2); - - dl = _itod(left4, left4); - dr = _itod(right4, right4); - - _amemd8(&dest_ptr1[ 0]) = dl; - _amemd8(&dest_ptr2[ 0]) = dr; - - _amemd8(&dest_ptr1[ 8]) = dl; - _amemd8(&dest_ptr2[ 8]) = dr; - - _amemd8(&dest_ptr1[16]) = dl; - _amemd8(&dest_ptr2[16]) = dr; - - _amemd8(&dest_ptr1[24]) = dl; - _amemd8(&dest_ptr2[24]) = dr; - - _amemd8(&dest_ptr1[32]) = dl; - _amemd8(&dest_ptr2[32]) = dr; - - _amemd8(&dest_ptr1[40]) = dl; - _amemd8(&dest_ptr2[40]) = dr; - - - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} -/**************************************************************************** - * - * - ****************************************************************************/ -static -void copy_uvleft_right_border( - UINT8 *restrict src_ptr1, - UINT8 *restrict src_ptr2, - UINT8 *restrict dest_ptr1, - UINT8 *restrict dest_ptr2, - UINT32 plane_height, - UINT32 plane_stride -) -{ - UINT32 left, right, left2, left4, right2, right4; - double dl, dr; - int i; - -#pragma MUST_ITERATE(8,8 ,8) - - for (i = 0; i < plane_height; i++) - { - left = src_ptr1[0]; - right = src_ptr2[0]; - - left2 = _pack2(left, left); - left4 = _packl4(left2, left2); - - right2 = _pack2(right, right); - right4 = _packl4(right2, right2); - - dl = _itod(left4, left4); - dr = _itod(right4, right4); - - _amemd8(&dest_ptr1[ 0]) = dl; - _amemd8(&dest_ptr2[ 0]) = dr; - - _amemd8(&dest_ptr1[ 8]) = dl; - _amemd8(&dest_ptr2[ 8]) = dr; - - _amemd8(&dest_ptr1[16]) = dl; - _amemd8(&dest_ptr2[16]) = dr; - - - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} -/**************************************************************************** - * - ****************************************************************************/ -void -vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - -#if 1 - // copy the left and right most columns out - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - copy_yleft_right_border(src_ptr1, src_ptr2, dest_ptr1, dest_ptr2, plane_height, plane_stride); -#endif - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; - - /***********/ - /* U Plane */ - /***********/ -#if 1 - // copy the left and right most columns out - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - copy_uvleft_right_border(src_ptr1, src_ptr2, dest_ptr1, dest_ptr2, plane_height, plane_stride); - - -#endif - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ -#if 1 - // copy the left and right most columns out - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - copy_uvleft_right_border(src_ptr1, src_ptr2, dest_ptr1, dest_ptr2, plane_height, plane_stride); - -#endif - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} -/**************************************************************************** - * - ****************************************************************************/ -void -vpxyv12_extend_frame_tbborders(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - int tid1, tid2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - - for (i = 0; i < (int)Border; i++) - { - dat_copy(src_ptr1, dest_ptr1, plane_stride); - dat_copy(src_ptr2, dest_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; - - /***********/ - /* U Plane */ - /***********/ - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - dat_copy(src_ptr1, dest_ptr1, plane_stride); - dat_copy(src_ptr2, dest_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - tid1 = dat_copy(src_ptr1, dest_ptr1, plane_stride); - tid2 = dat_copy(src_ptr2, dest_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - dat_wait(tid1); - dat_wait(tid2); -} - -/**************************************************************************** - * - * ROUTINE : vp8_yv12_copy_frame - * - * INPUTS : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies the source image into the destination image and - * updates the destination's UMV borders. Because the - * borders have been update prior to this so the whole frame - * is copied, borders and all. This is also to circumvent - * using copy_left_right Border functions when copying data - * between L2 and main memory. When that occurs a cache - * clean needs to be done, which would require invalidating - * an entire frame. - * - * SPECIAL NOTES : The frames are assumed to be identical in size. - * - ****************************************************************************/ -void -vpxyv12_copy_frame_dma(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int yheight, uv_height; - int ystride, uv_stride; - int border; - int yoffset, uvoffset; - - border = src_ybc->border; - yheight = src_ybc->y_height; - uv_height = src_ybc->uv_height; - - ystride = src_ybc->y_stride; - uv_stride = src_ybc->uv_stride; - - yoffset = border * (ystride + 1); - uvoffset = border / 2 * (uv_stride + 1); - - dat_copy2d(DAT_2D2D, - src_ybc->y_buffer - yoffset, - dst_ybc->y_buffer - yoffset, - ystride, - yheight + 2 * border, - ystride); - dat_copy2d(DAT_2D2D, - src_ybc->u_buffer - uvoffset, - dst_ybc->u_buffer - uvoffset, - uv_stride, - uv_height + border, - uv_stride); - dat_copy2d(DAT_2D2D, - src_ybc->v_buffer - uvoffset, - dst_ybc->v_buffer - uvoffset, - uv_stride, - uv_height + border, - uv_stride); - -} - - -/**************************************************************************** - * - * ROUTINE : vp8_yv12_copy_frame - * - * INPUTS : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies the source image into the destination image and - * updates the destination's UMV borders. - * - * SPECIAL NOTES : The frames are assumed to be identical in size. - * - ****************************************************************************/ -void -vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - unsigned char *source, *dest; - - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; - - for (row = 0; row < src_ybc->y_height; row++) - { - vpx_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } - - source = src_ybc->u_buffer; - dest = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - source = src_ybc->v_buffer; - dest = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders(dst_ybc); -} diff -Nru libvpx-0.9.5/vpx_scale/generic/bicubic_scaler.c libvpx-0.9.6/vpx_scale/generic/bicubic_scaler.c --- libvpx-0.9.5/vpx_scale/generic/bicubic_scaler.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/generic/bicubic_scaler.c 2011-03-04 20:40:41.000000000 +0000 @@ -271,17 +271,17 @@ { if (!g_first_time) { - if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w); + vpx_free(g_b_scaler.l_w); - if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); - if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); } @@ -342,21 +342,21 @@ d_h_uv = (in_height / 2) / gcd_h_uv; // allocate memory for the coefficents - if (g_b_scaler.l_w) vpx_free(g_b_scaler.l_w); + vpx_free(g_b_scaler.l_w); - if (g_b_scaler.l_h) vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - if (g_b_scaler.l_h_uv) vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); - if (g_b_scaler.c_w) vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - if (g_b_scaler.c_h) vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - if (g_b_scaler.c_h_uv) vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); diff -Nru libvpx-0.9.5/vpx_scale/generic/yv12config.c libvpx-0.9.6/vpx_scale/generic/yv12config.c --- libvpx-0.9.5/vpx_scale/generic/yv12config.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/generic/yv12config.c 2011-03-04 20:40:41.000000000 +0000 @@ -24,10 +24,7 @@ { if (ybf) { - if (ybf->buffer_alloc) - { duck_free(ybf->buffer_alloc); - } ybf->buffer_alloc = 0; } @@ -81,6 +78,8 @@ ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * ybf->uv_stride) + border / 2; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * ybf->uv_stride) + border / 2; + + ybf->corrupted = 0; /* assume not currupted by errors */ } else { diff -Nru libvpx-0.9.5/vpx_scale/include/leapster/vpxscale.h libvpx-0.9.6/vpx_scale/include/leapster/vpxscale.h --- libvpx-0.9.5/vpx_scale/include/leapster/vpxscale.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/include/leapster/vpxscale.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : postp.h -* -* Description : Post processor interface -* -****************************************************************************/ -#ifndef VPXSCALE_H -#define VPXSCALE_H - - -// fwg 2004-10-14 -typedef void (*vpxvertical_band_4_5_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -typedef void (*vpxlast_vertical_band_4_5_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -typedef void (*vpxvertical_band_3_5_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -typedef void (*vpxlast_vertical_band_3_5_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -typedef void (*vpxhorizontal_line_1_2_scale_lf)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -typedef void (*vpxhorizontal_line_3_5_scale_lf)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -typedef void (*vpxhorizontal_line_4_5_scale_lf)(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width); -typedef void (*vpxvertical_band_1_2_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); -typedef void (*vpxlast_vertical_band_1_2_scale_lf)(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width); - - -typedef struct vpxglobal_scalling_ptrs_t -{ - vpxvertical_band_4_5_scale_lf vpxvertical_band_4_5_scale_t; - vpxlast_vertical_band_4_5_scale_lf vpxlast_vertical_band_4_5_scale_t; - vpxvertical_band_3_5_scale_lf vpxvertical_band_3_5_scale_t; - vpxlast_vertical_band_3_5_scale_lf vpxlast_vertical_band_3_5_scale_t; - vpxhorizontal_line_1_2_scale_lf vpxhorizontal_line_1_2_scale_t; - vpxhorizontal_line_3_5_scale_lf vpxhorizontal_line_3_5_scale_t; - vpxhorizontal_line_4_5_scale_lf vpxhorizontal_line_4_5_scale_t; - vpxvertical_band_1_2_scale_lf vpxvertical_band_1_2_scale_t; - vpxlast_vertical_band_1_2_scale_lf vpxlast_vertical_band_1_2_scale_t; -} vpxglobal_scalling_ptrs; - -extern struct vpxglobal_scalling_ptrs_t *g_scaling_ptrs; - -/* -extern void (*vp8_vertical_band_4_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -extern void (*vp8_last_vertical_band_4_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -extern void (*vp8_vertical_band_3_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -extern void (*vp8_last_vertical_band_3_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -extern void (*vp8_horizontal_line_1_2_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -extern void (*vp8_horizontal_line_3_5_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -extern void (*vp8_horizontal_line_4_5_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -extern void (*vp8_vertical_band_1_2_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -extern void (*vp8_last_vertical_band_1_2_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -*/ - -#endif diff -Nru libvpx-0.9.5/vpx_scale/intel_linux/scaleopt.c libvpx-0.9.6/vpx_scale/intel_linux/scaleopt.c --- libvpx-0.9.5/vpx_scale/intel_linux/scaleopt.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/intel_linux/scaleopt.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1853 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : scaleopt.cpp -* -* Description : Optimized scaling functions -* -****************************************************************************/ -#include "pragmas.h" - -/**************************************************************************** -* Module Statics -****************************************************************************/ -#if 0 -__declspec(align(16)) const static unsigned short one_fifth[] = { 51, 51, 51, 51 }; -__declspec(align(16)) const static unsigned short two_fifths[] = { 102, 102, 102, 102 }; -__declspec(align(16)) const static unsigned short three_fifths[] = { 154, 154, 154, 154 }; -__declspec(align(16)) const static unsigned short four_fifths[] = { 205, 205, 205, 205 }; -__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; -__declspec(align(16)) const static unsigned short four_ones[] = { 1, 1, 1, 1}; -__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 }; -__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 }; -__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0}; -__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 }; -__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 }; -#endif - -#include "vpx_scale/vpxscale.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** - * - * ROUTINE : horizontal_line_3_5_scale_mmx - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_3_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - __declspec(align(16)) unsigned short const35_2[] = { 154, 51, 205, 102 }; - __declspec(align(16)) unsigned short const35_1[] = { 102, 205, 51, 154 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - - (void) dest_width; - - __asm - { - - push ebx - - mov esi, source - mov edi, dest - - mov ecx, source_width - lea edx, [esi+ecx-3]; - - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_3_5_loop: - - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [edi], ebx // writeoutput 00 xx xx xx - add esi, 3 - - add edi, 5 - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - cmp esi, edx - packuswb mm0, mm7 - - movd DWORD Ptr [edi-4], mm0 - jl horiz_line_3_5_loop - -//Exit: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 - - or eax, ebx // eax = 01 02 02 02 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [edi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - packuswb mm0, mm7 - movd DWORD Ptr [edi+1], mm0 - - pop ebx - - } - - /* - const unsigned char *src = source; - unsigned char *des = dest; - unsigned int a, b, c ; - unsigned int i; - (void) dest_width; - - for ( i=0; i> 8); - c = src[2] ; - // 4 * left + 1 * right /5 - des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8); - // 1 * left + 4 * right /5 - des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8); - - a = src[3]; - // 3 * left + 2 * right /5 - des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8); - - src += 3; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (UINT8) (a); - // 2 * left + 3 * right /5 - des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8); - c = src[2] ; - // 4 * left + 1 * right /5 - des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8); - // 1 * left + 4 * right /5 - des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8); - - des [4] = (UINT8) (c); - */ -} - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_4_5_scale_mmx - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_4_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __declspec(align(16)) unsigned short const45_2[] = {205, 154, 102, 51 }; - __declspec(align(16)) unsigned short const45_1[] = { 51, 102, 154, 205 }; - __declspec(align(16)) unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0}; - - (void)dest_width; - - __asm - { - - mov esi, source - mov edi, dest - - mov ecx, source_width - lea edx, [esi+ecx-8]; - - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_4_5_loop: - - movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 - - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - add edi, 10 - - add esi, 8 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - cmp esi, edx - - psrlw mm2, 8 - packuswb mm2, mm7 - - movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop - -//Exit: - movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - - movq mm3, mm1 - - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - psrlw mm2, 8 - - packuswb mm2, mm7 - movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - - - } - /* - const unsigned char *src = source; - unsigned char *des = dest; - unsigned int a, b, c ; - unsigned i; - (void) dest_width; - - for ( i=0; i> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (UINT8) (( b * 102 + c + 128) >> 8); - des [3] = (UINT8) (( c + 102 * a + 128) >> 8); - b = src[4]; - des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8); - - src += 4; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (UINT8) (a); - des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (UINT8) (( b * 102 + c + 128) >> 8); - des [3] = (UINT8) (( c + 102 * a + 128) >> 8); - des [4] = (UINT8) (a); - */ -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_4_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has a "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - - __declspec(align(16)) unsigned short one_fifth[] = { 51, 51, 51, 51 }; - __declspec(align(16)) unsigned short two_fifths[] = { 102, 102, 102, 102 }; - __declspec(align(16)) unsigned short three_fifths[] = { 154, 154, 154, 154 }; - __declspec(align(16)) unsigned short four_fifths[] = { 205, 205, 205, 205 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - - __asm - { - - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_4_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - - movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 - - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy - - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low - - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high - - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 - - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] - - movq QWORD ptr [edi+ecx], mm1 // write des[4] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg vs_4_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_4_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __declspec(align(16)) unsigned short one_fifth[] = { 51, 51, 51, 51 }; - __declspec(align(16)) unsigned short two_fifths[] = { 102, 102, 102, 102 }; - __declspec(align(16)) unsigned short three_fifths[] = { 154, 154, 154, 154 }; - __declspec(align(16)) unsigned short four_fifths[] = { 205, 205, 205, 205 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - last_vs_4_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; - - movq QWORD ptr [edi+ecx], mm1 // write des[4]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add edi, 8 - add esi, 8 - - sub edx, 8 - jg last_vs_4_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_3_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __declspec(align(16)) unsigned short one_fifth[] = { 51, 51, 51, 51 }; - __declspec(align(16)) unsigned short two_fifths[] = { 102, 102, 102, 102 }; - __declspec(align(16)) unsigned short three_fifths[] = { 154, 154, 154, 154 }; - __declspec(align(16)) unsigned short four_fifths[] = { 205, 205, 205, 205 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_3_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 - - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy - - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low - - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high - - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 - - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] - - movq QWORD ptr [edi+ecx], mm0 // write des[4] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg vs_3_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_3_5_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __declspec(align(16)) unsigned short one_fifth[] = { 51, 51, 51, 51 }; - __declspec(align(16)) unsigned short two_fifths[] = { 102, 102, 102, 102 }; - __declspec(align(16)) unsigned short three_fifths[] = { 154, 154, 154, 154 }; - __declspec(align(16)) unsigned short four_fifths[] = { 205, 205, 205, 205 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - - last_vs_3_5_loop: - - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] - - - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq QWORD ptr [edi+ecx], mm0 // write des[4] - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - add edi, 8 - add esi, 8 - - sub edx, 8 - jg last_vs_3_5_loop - } -} - -/**************************************************************************** - * - * ROUTINE : vertical_band_1_2_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __declspec(align(16))unsigned short four_ones[] = { 1, 1, 1, 1}; - - __asm - { - - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_1_2_loop: - - movq mm0, [esi] // get Src[0] - movq mm1, [esi + ecx * 2] // get Src[1] - - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack - - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 - - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) - - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) - - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) - - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) - - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results - - movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 - - sub edx, 8 - jg vs_1_2_loop - } - -} - -/**************************************************************************** - * - * ROUTINE : last_vertical_band_1_2_scale_mmx - * - * INPUTS : unsigned char *dest : - * unsigned int dest_pitch : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of band of pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. The function also has an "C" only - * version. - * - ****************************************************************************/ -static -void last_vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - mov edx, dest_width // Loop counter - - last_vs_1_2_loop: - - movq mm0, [esi] // get Src[0] - movq [esi+ecx], mm0 // write out eight bytes - - add esi, 8 - sub edx, 8 - - jg last_vs_1_2_loop - } -} - -/**************************************************************************** - * - * ROUTINE : horizontal_line_1_2_scale - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_1_2_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - __declspec(align(16))unsigned short four_ones[] = { 1, 1, 1, 1}; - - (void) dest_width; - - __asm - { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - movq mm6, four_ones - - mov ecx, source_width - - hs_1_2_loop: - - movq mm0, [esi] - movq mm1, [esi+1] - - movq mm2, mm0 - movq mm3, mm1 - - movq mm4, mm0 - punpcklbw mm0, mm7 - - punpcklbw mm1, mm7 - paddw mm0, mm1 - - paddw mm0, mm6 - punpckhbw mm2, mm7 - - punpckhbw mm3, mm7 - paddw mm2, mm3 - - paddw mm2, mm6 - psraw mm0, 1 - - psraw mm2, 1 - packuswb mm0, mm2 - - movq mm2, mm4 - punpcklbw mm2, mm0 - - movq [edi], mm2 - punpckhbw mm4, mm0 - - movq [edi+8], mm4 - add esi, 8 - - add edi, 16 - sub ecx, 8 - - cmp ecx, 8 - jg hs_1_2_loop - -// last eight pixel - - movq mm0, [esi] - movq mm1, mm0 - - movq mm2, mm0 - movq mm3, mm1 - - psrlq mm1, 8 - psrlq mm3, 56 - - psllq mm3, 56 - por mm1, mm3 - - movq mm3, mm1 - movq mm4, mm0 - - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - - paddw mm0, mm1 - paddw mm0, mm6 - - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - - paddw mm2, mm3 - paddw mm2, mm6 - - psraw mm0, 1 - psraw mm2, 1 - - packuswb mm0, mm2 - movq mm2, mm4 - - punpcklbw mm2, mm0 - movq [edi], mm2 - - punpckhbw mm4, mm0 - movq [edi+8], mm4 - } -} - - - - - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_5_4_scale_mmx - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 4 to 5. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_5_4_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - - __declspec(align(16)) const unsigned short const54_2[] = { 0, 64, 128, 192 }; - __declspec(align(16)) const unsigned short const54_1[] = {256, 192, 128, 64 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for ( i=0; i>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); - - src += 5; - des += 4; - } - */ - __asm - { - - mov esi, source ; - mov edi, dest ; - - mov ecx, source_width ; - movq mm5, const54_1 ; - - pxor mm7, mm7 ; - movq mm6, const54_2 ; - - movq mm4, round_values ; - lea edx, [esi+ecx] ; - horizontal_line_5_4_loop: - - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psrlq mm0, 8 ; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7 ; - xx 00 xx 01 xx 02 xx 03 - - punpcklbw mm0, mm7 ; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 - - pmullw mm0, mm6 - add esi, 5 - - add edi, 4 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp esi, edx - packuswb mm1, mm7 - - movd DWORD PTR [edi-4], mm1 - - jl horizontal_line_5_4_loop - - } - -} - -static -void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - - __declspec(align(16)) const unsigned short one_fourths[] = { 64, 64, 64, 64 }; - __declspec(align(16)) const unsigned short two_fourths[] = { 128, 128, 128, 128 }; - __declspec(align(16)) const unsigned short three_fourths[] = { 192, 192, 192, 192 }; - - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __asm - { - push ebx - - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - mov ebx, dest_width - - vs_5_4_loop: - - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; - - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - movq mm3, mm2 - pmullw mm1, three_fourths - - pmullw mm2, one_fourths - movd mm4, [eax+ecx] - - pmullw mm3, two_fourths - punpcklbw mm4, mm7 - - movq mm5, mm4 - pmullw mm4, two_fourths - - paddw mm1, mm2 - movd mm6, [eax+ecx*2] - - pmullw mm5, one_fourths - paddw mm1, round_values; - - paddw mm3, mm4 - psrlw mm1, 8 - - punpcklbw mm6, mm7 - paddw mm3, round_values - - pmullw mm6, three_fourths - psrlw mm3, 8 - - packuswb mm1, mm7 - packuswb mm3, mm7 - - movd DWORD PTR [edi], mm0 - movd DWORD PTR [edi+edx], mm1 - - - paddw mm5, mm6 - movd DWORD PTR [edi+edx*2], mm3 - - lea eax, [edi+edx*2] - paddw mm5, round_values - - psrlw mm5, 8 - add edi, 4 - - packuswb mm5, mm7 - movd DWORD PTR [eax+edx], mm5 - - add esi, 4 - sub ebx, 4 - - jg vs_5_4_loop - - pop ebx - } -} - - - -static -void horizontal_line_5_3_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - __declspec(align(16)) const unsigned short const53_1[] = { 0, 85, 171, 0 }; - __declspec(align(16)) const unsigned short const53_2[] = {256, 171, 85, 0 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __asm - { - - mov esi, source ; - mov edi, dest ; - - mov ecx, source_width ; - movq mm5, const53_1 ; - - pxor mm7, mm7 ; - movq mm6, const53_2 ; - - movq mm4, round_values ; - lea edx, [esi+ecx-5] ; - horizontal_line_5_3_loop: - - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - add esi, 5 - - add edi, 3 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp esi, edx - packuswb mm1, mm7 - - movd DWORD PTR [edi-3], mm1 - jl horizontal_line_5_3_loop - -//exit condition - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - packuswb mm1, mm7 - movd eax, mm1 - - mov edx, eax - shr edx, 16 - - mov WORD PTR[edi], ax - mov BYTE PTR[edi+2], dl - - } - -} - - -static -void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __declspec(align(16)) const unsigned short one_thirds[] = { 85, 85, 85, 85 }; - __declspec(align(16)) const unsigned short two_thirds[] = { 171, 171, 171, 171 }; - - __asm - { - push ebx - - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds - - movq mm6, two_thirds - mov ebx, dest_width; - - vs_5_3_loop: - - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; - - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - pmullw mm1, mm5 - pmullw mm2, mm6 - - movd mm3, DWORD ptr [eax+ecx] - movd mm4, DWORD ptr [eax+ecx*2] - - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 - - pmullw mm3, mm6 - pmullw mm4, mm5 - - - movd DWORD PTR [edi], mm0 - paddw mm1, mm2 - - paddw mm1, round_values - psrlw mm1, 8 - - packuswb mm1, mm7 - paddw mm3, mm4 - - paddw mm3, round_values - movd DWORD PTR [edi+edx], mm1 - - psrlw mm3, 8 - packuswb mm3, mm7 - - movd DWORD PTR [edi+edx*2], mm3 - - - add edi, 4 - add esi, 4 - - sub ebx, 4 - jg vs_5_3_loop - - pop ebx - } -} - - - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_2_1_scale - * - * INPUTS : const unsigned char *source : - * unsigned int source_width : - * unsigned char *dest : - * unsigned int dest_width : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void horizontal_line_2_1_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - __asm - { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor edx, edx - hs_2_1_loop: - - movq mm0, [esi+edx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [edi+edx], mm0; - add edx, 4 - - cmp edx, ecx - jl hs_2_1_loop - - } -} - - - -static -void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - vpx_memcpy(dest, source, dest_width); -} - - - -static -void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - - __declspec(align(16)) const unsigned short three_sixteenths[] = { 48, 48, 48, 48 }; - __declspec(align(16)) const unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; - __declspec(align(16)) unsigned short round_values[] = { 128, 128, 128, 128 }; - __asm - { - mov esi, source - mov edi, dest - - mov eax, src_pitch - mov edx, dest_width - - pxor mm7, mm7 - sub esi, eax //back one line - - - lea ecx, [esi+edx]; - movq mm6, round_values; - - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; - - vs_2_1_i_loop: - movd mm0, [esi] // - movd mm1, [esi+eax] // - - movd mm2, [esi+eax*2] // - punpcklbw mm0, mm7 - - pmullw mm0, mm5 - punpcklbw mm1, mm7 - - pmullw mm1, mm4 - punpcklbw mm2, mm7 - - pmullw mm2, mm5 - paddw mm0, round_values - - paddw mm1, mm2 - paddw mm0, mm1 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD PTR [edi], mm0 - add esi, 4 - - add edi, 4; - cmp esi, ecx - jl vs_2_1_i_loop - - } -} - -void -register_mmxscalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - - - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; - -} diff -Nru libvpx-0.9.5/vpx_scale/intel_linux/scalesystemdependant.c libvpx-0.9.6/vpx_scale/intel_linux/scalesystemdependant.c --- libvpx-0.9.5/vpx_scale/intel_linux/scalesystemdependant.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/intel_linux/scalesystemdependant.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : system_dependant.c -* -* Description : Miscellaneous system dependant functions -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" -#include "cpuidlib.h" - -/**************************************************************************** -* Imports -*****************************************************************************/ -extern void register_generic_scalers(void); -extern void register_mmxscalers(void); - -/**************************************************************************** - * - * ROUTINE : post_proc_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void -vp8_scale_machine_specific_config(void) -{ - // If MMX supported then set to use MMX versions of functions else - // use original 'C' versions. - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; - - vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - - if (mmx_enabled || xmm_enabled || wmt_enabled) - { - register_mmxscalers(); - } - else - { - vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; - vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; - vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; - vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - - - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - - } -} diff -Nru libvpx-0.9.5/vpx_scale/leapster/doptsystemdependant_lf.c libvpx-0.9.6/vpx_scale/leapster/doptsystemdependant_lf.c --- libvpx-0.9.5/vpx_scale/leapster/doptsystemdependant_lf.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/leapster/doptsystemdependant_lf.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : system_dependant.c -* -* Description : Miscellaneous system dependant functions -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" - -/**************************************************************************** -* Imports -*****************************************************************************/ -extern int register_generic_scalers(void); -extern int de_register_generic_scalers(void); - -/**************************************************************************** - * - * ROUTINE : vp8_scale_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : int - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -int -vp8_scale_machine_specific_config() -{ - return register_generic_scalers(); -} - -/**************************************************************************** - * - * ROUTINE : vp8_scale_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : int - * - * FUNCTION : Resets the funtion pointers and deallocates memory. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -int -scale_machine_specific_de_config() -{ - return de_register_generic_scalers(); -} diff -Nru libvpx-0.9.5/vpx_scale/leapster/gen_scalers_lf.c libvpx-0.9.6/vpx_scale/leapster/gen_scalers_lf.c --- libvpx-0.9.5/vpx_scale/leapster/gen_scalers_lf.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/leapster/gen_scalers_lf.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,522 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : gen_scalers.c - * - * Description : Generic image scaling functions. - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" - -/**************************************************************************** -* Imports -****************************************************************************/ - -/**************************************************************************** - * - * ROUTINE : vp8cx_horizontal_line_4_5_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 4 to 5. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void vp8cx_horizontal_line_4_5_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 4; i += 4) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char) a; - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - b = src[4]; - des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); - - src += 4; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - des [4] = (unsigned char)(a); - -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_vertical_band_4_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -static -void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; - - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); - - // First line in next band - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); - - des ++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_last_vertical_band_4_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The - * height of the band scaled is 4-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -static -void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - a = des[0]; - b = des[dest_pitch]; - - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; - - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); - - // No other line for interplation of this line, so .. - des[dest_pitch*4] = (unsigned char) d; - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_horizontal_line_3_5_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 3 to 5. - * - * SPECIAL NOTES : None. - * - * - ****************************************************************************/ -static -void vp8cx_horizontal_line_3_5_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = src[2] ; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - a = src[3]; - des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - src += 3; - des += 5; - } - - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = src[2] ; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - des [4] = (unsigned char)(c); -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_vertical_band_3_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -static -void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - // First line in next band... - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_last_vertical_band_3_5_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The - * height of the band scaled is 3-pixels. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -static -void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; - - des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - // No other line for interplation of this line, so .. - des [ dest_pitch * 4 ] = (unsigned char)(c) ; - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_horizontal_line_1_2_scale_c - * - * INPUTS : const unsigned char *source : Pointer to source data. - * unsigned int source_width : Stride of source. - * unsigned char *dest : Pointer to destination data. - * unsigned int dest_width : Stride of destination (NOT USED). - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies horizontal line of pixels from source to - * destination scaling up by 1 to 2. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void vp8cx_horizontal_line_1_2_scale_c -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 1; i += 1) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a + b + 1) >> 1); - src += 1; - des += 2; - } - - a = src[0]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)(a); -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_vertical_band_1_2_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine uses the first line of the band below - * the current band. - * - ****************************************************************************/ -static -void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch * 2]; - - des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); - - des++; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8cx_last_vertical_band_1_2_scale_c - * - * INPUTS : unsigned char *dest : Pointer to destination data. - * unsigned int dest_pitch : Stride of destination data. - * unsigned int dest_width : Width of destination data. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The - * height of the band scaled is 1-pixel. - * - * SPECIAL NOTES : The routine does not have available the first line of - * the band below the current band, since this is the - * last band. - * - ****************************************************************************/ -static -void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - des[dest_pitch] = des[0]; - des++; - } -} - -#include "vpx_scale/vpxscale.h" -#include "vpx_mem/vpx_mem.h" - -struct vpxglobal_scalling_ptrs_t *g_scaling_ptrs = 0; - -int -register_generic_scalers(void) -{ - int rv = 0; - - g_scaling_ptrs = (struct vpxglobal_scalling_ptrs_t *)vpx_malloc(sizeof(struct vpxglobal_scalling_ptrs_t)); - - if (g_scaling_ptrs) - { - g_scaling_ptrs->vpxhorizontal_line_1_2_scale_t = vp8cx_horizontal_line_1_2_scale_c; - g_scaling_ptrs->vpxvertical_band_1_2_scale_t = vp8cx_vertical_band_1_2_scale_c; - g_scaling_ptrs->vpxlast_vertical_band_1_2_scale_t = vp8cx_last_vertical_band_1_2_scale_c; - g_scaling_ptrs->vpxhorizontal_line_3_5_scale_t = vp8cx_horizontal_line_3_5_scale_c; - g_scaling_ptrs->vpxvertical_band_3_5_scale_t = vp8cx_vertical_band_3_5_scale_c; - g_scaling_ptrs->vpxlast_vertical_band_3_5_scale_t = vp8cx_last_vertical_band_3_5_scale_c; - g_scaling_ptrs->vpxhorizontal_line_4_5_scale_t = vp8cx_horizontal_line_4_5_scale_c; - g_scaling_ptrs->vpxvertical_band_4_5_scale_t = vp8cx_vertical_band_4_5_scale_c; - g_scaling_ptrs->vpxlast_vertical_band_4_5_scale_t = vp8cx_last_vertical_band_4_5_scale_c; - } - else - { - rv = -1; - } - - /* - vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; - vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; - vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; - vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - */ - - return rv; -} - -int -de_register_generic_scalers(void) -{ - int rv = 0; - - if (g_scaling_ptrs) - { - vpx_free(g_scaling_ptrs); - g_scaling_ptrs = 0; - } - else - { - rv = -1; - } - - return rv; -} diff -Nru libvpx-0.9.5/vpx_scale/leapster/vpxscale_lf.c libvpx-0.9.6/vpx_scale/leapster/vpxscale_lf.c --- libvpx-0.9.5/vpx_scale/leapster/vpxscale_lf.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/leapster/vpxscale_lf.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,891 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : scale.c - * - * Description : Image scaling functions. - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "stdlib.h" -#include "vpx_scale/vpxscale.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/yv12config.h" -#include "codec_common_interface.h" - -/**************************************************************************** -* Exports -****************************************************************************/ -/* -void (*vp8_vertical_band_4_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -void (*vp8_last_vertical_band_4_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -void (*vp8_vertical_band_3_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -void (*vp8_last_vertical_band_3_5_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -void (*vp8_horizontal_line_1_2_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -void (*vp8_horizontal_line_3_5_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -void (*vp8_horizontal_line_4_5_scale)(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width); -void (*vp8_vertical_band_1_2_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -void (*vp8_last_vertical_band_1_2_scale)(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width); -*/ - - -typedef struct -{ - int expanded_frame_width; - int expanded_frame_height; - - int HScale; - int HRatio; - int VScale; - int VRatio; - - YV12_BUFFER_CONFIG *src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config; - -} SCALE_VARS; - - -/**************************************************************************** - * - * ROUTINE : horizontal_line_copy - * - * INPUTS : None - * - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 1 to 1 scaling up for a horizontal line of pixles - * - * SPECIAL NOTES : None. - * - * ERRORS : None. - * - ****************************************************************************/ -static -void horizontal_line_copy( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - duck_memcpy(dest, source, source_width); -} -/**************************************************************************** - * - * ROUTINE : null_scale - * - * INPUTS : None - * - * - * OUTPUTS : None. - * - * RETURNS : None - * - * FUNCTION : 1 to 1 scaling up for a vertical band - * - * SPECIAL NOTES : None. - * - * ERRORS : None. - * - ****************************************************************************/ -static -void null_scale( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - return; -} - -/**************************************************************************** - * - * ROUTINE : scale1d_2t1_i - * - * INPUTS : const unsigned char *source : Pointer to data to be scaled. - * int source_step : Number of pixels to step on in source. - * unsigned int source_scale : Scale for source (UNUSED). - * unsigned int source_length : Length of source (UNUSED). - * unsigned char *dest : Pointer to output data array. - * int dest_step : Number of pixels to step on in destination. - * unsigned int dest_scale : Scale for destination (UNUSED). - * unsigned int dest_length : Length of destination. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs 2-to-1 interpolated scaling. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void scale1d_2t1_i -( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - unsigned int temp; - - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - dest[0] = source[0]; - - for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) - { - temp = 8; - temp += 3 * source[j-source_step]; - temp += 10 * source[j]; - temp += 3 * source[j+source_step]; - temp >>= 4; - dest[i] = (char)(temp); - } -} - -/**************************************************************************** - * - * ROUTINE : scale1d_2t1_ps - * - * INPUTS : const unsigned char *source : Pointer to data to be scaled. - * int source_step : Number of pixels to step on in source. - * unsigned int source_scale : Scale for source (UNUSED). - * unsigned int source_length : Length of source (UNUSED). - * unsigned char *dest : Pointer to output data array. - * int dest_step : Number of pixels to step on in destination. - * unsigned int dest_scale : Scale for destination (UNUSED). - * unsigned int dest_length : Length of destination. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs 2-to-1 point subsampled scaling. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void scale1d_2t1_ps -( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - j = 0; - - for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) - dest[i] = source[j]; -} -/**************************************************************************** - * - * ROUTINE : scale1d_c - * - * INPUTS : const unsigned char *source : Pointer to data to be scaled. - * int source_step : Number of pixels to step on in source. - * unsigned int source_scale : Scale for source. - * unsigned int source_length : Length of source (UNUSED). - * unsigned char *dest : Pointer to output data array. - * int dest_step : Number of pixels to step on in destination. - * unsigned int dest_scale : Scale for destination. - * unsigned int dest_length : Length of destination. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs linear interpolation in one dimension. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -void scale1d_c -( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i; - unsigned int round_value = dest_scale / 2; - unsigned int left_modifier = dest_scale; - unsigned int right_modifier = 0; - unsigned char left_pixel = *source; - unsigned char right_pixel = *(source + source_step); - - (void) source_length; - - // These asserts are needed if there are boundary issues... - //assert ( dest_scale > source_scale ); - //assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale ); - - for (i = 0; i < dest_length * dest_step; i += dest_step) - { - dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); - - right_modifier += source_scale; - - while (right_modifier > dest_scale) - { - right_modifier -= dest_scale; - source += source_step; - left_pixel = *source; - right_pixel = *(source + source_step); - } - - left_modifier = dest_scale - right_modifier; - } -} - -/**************************************************************************** - * - * ROUTINE : Scale2D - * - * INPUTS : const unsigned char *source : Pointer to data to be scaled. - * int source_pitch : Stride of source image. - * unsigned int source_width : Width of input image. - * unsigned int source_height : Height of input image. - * unsigned char *dest : Pointer to output data array. - * int dest_pitch : Stride of destination image. - * unsigned int dest_width : Width of destination image. - * unsigned int dest_height : Height of destination image. - * unsigned char *temp_area : Pointer to temp work area. - * unsigned char temp_area_height : Height of temp work area. - * unsigned int hscale : Horizontal scale factor numerator. - * unsigned int hratio : Horizontal scale factor denominator. - * unsigned int vscale : Vertical scale factor numerator. - * unsigned int vratio : Vertical scale factor denominator. - * unsigned int interlaced : Interlace flag. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs 2-tap linear interpolation in two dimensions. - * - * SPECIAL NOTES : Expansion is performed one band at a time to help with - * caching. - * - ****************************************************************************/ -static -void Scale2D -( - const unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - int dest_pitch, - unsigned int dest_width, - unsigned int dest_height, - unsigned char *temp_area, - unsigned char temp_area_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - unsigned int i, j, k; - unsigned int bands; - unsigned int dest_band_height; - unsigned int source_band_height; - - typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, - unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); - - Scale1D Scale1Dv = scale1d_c; - Scale1D Scale1Dh = scale1d_c; - - if (hscale == 2 && hratio == 1) - Scale1Dh = scale1d_2t1_ps; - - if (vscale == 2 && vratio == 1) - { - if (interlaced) - Scale1Dv = scale1d_2t1_ps; - else - Scale1Dv = scale1d_2t1_i; - } - - if (source_height == dest_height) - { - // for each band of the image - for (k = 0; k < dest_height; k++) - { - Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); - source += source_pitch; - dest += dest_pitch; - } - - return; - } - - if (dest_height > source_height) - { - dest_band_height = temp_area_height - 1; - source_band_height = dest_band_height * source_height / dest_height; - } - else - { - source_band_height = temp_area_height - 1; - dest_band_height = source_band_height * vratio / vscale; - } - - // first row needs to be done so that we can stay one row ahead for vertical zoom - Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); - - // for each band of the image - bands = (dest_height + dest_band_height - 1) / dest_band_height; - - for (k = 0; k < bands; k++) - { - // scale one band horizontally - for (i = 1; i < source_band_height + 1; i++) - { - if (k * source_band_height + i < source_height) - { - Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, - temp_area + i * dest_pitch, 1, hratio, dest_width); - } - else // Duplicate the last row - { - // copy temp_area row 0 over from last row in the past - duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); - } - } - - // scale one band vertically - for (j = 0; j < dest_width; j++) - { - Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, - &dest[j], dest_pitch, vratio, dest_band_height); - } - - // copy temp_area row 0 over from last row in the past - duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); - - // move to the next band - source += source_band_height * source_pitch; - dest += dest_band_height * dest_pitch; - } -} - -/**************************************************************************** - * - * ROUTINE : vp8_scale_frame - * - * INPUTS : YV12_BUFFER_CONFIG *src : Pointer to frame to be scaled. - * YV12_BUFFER_CONFIG *dst : Pointer to buffer to hold scaled frame. - * unsigned char *temp_area : Pointer to temp work area. - * unsigned char temp_area_height : Height of temp work area. - * unsigned int hscale : Horizontal scale factor numerator. - * unsigned int hratio : Horizontal scale factor denominator. - * unsigned int vscale : Vertical scale factor numerator. - * unsigned int vratio : Vertical scale factor denominator. - * unsigned int interlaced : Interlace flag. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Performs 2-tap linear interpolation in two dimensions. - * - * SPECIAL NOTES : Expansion is performed one band at a time to help with - * caching. - * - ****************************************************************************/ -void vp8_scale_frame -( - YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - int i; - int dw = (hscale - 1 + src->y_width * hratio) / hscale; - int dh = (vscale - 1 + src->y_height * vratio) / vscale; - - // call our internal scaling routines!! - Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, - (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw < (int)dst->y_width) - for (i = 0; i < dh; i++) - duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1); - - if (dh < (int)dst->y_height) - for (i = dh - 1; i < (int)dst->y_height; i++) - duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); - - Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int)dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); - - Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int) dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); -} -/**************************************************************************** - * - * ROUTINE : any_ratio_2d_scale - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED). - * const unsigned char *source : Pointer to source image. - * unsigned int source_pitch : Stride of source image. - * unsigned int source_width : Width of source image. - * unsigned int source_height : Height of source image (NOT USED). - * unsigned char *dest : Pointer to destination image. - * unsigned int dest_pitch : Stride of destination image. - * unsigned int dest_width : Width of destination image. - * unsigned int dest_height : Height of destination image. - * - * OUTPUTS : None. - * - * RETURNS : int: 1 if image scaled, 0 if image could not be scaled. - * - * FUNCTION : Scale the image with changing apect ratio. - * - * SPECIAL NOTES : This scaling is a bi-linear scaling. Need to re-work the - * whole function for new scaling algorithm. - * - ****************************************************************************/ -static -int any_ratio_2d_scale -( - SCALE_VARS *si, - const unsigned char *source, - unsigned int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width, - unsigned int dest_height -) -{ - unsigned int i, k; - unsigned int src_band_height = 0; - unsigned int dest_band_height = 0; - - // suggested scale factors - int hs = si->HScale; - int hr = si->HRatio; - int vs = si->VScale; - int vr = si->VRatio; - - // assume the ratios are scalable instead of should be centered - int ratio_scalable = 1; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - - (void) si; - - // find out the ratio for each direction - switch (hr * 10 / hs) - { - case 8: - // 4-5 Scale in Width direction - horiz_line_scale = g_scaling_ptrs->vpxhorizontal_line_4_5_scale_t; - break; - case 6: - // 3-5 Scale in Width direction - horiz_line_scale = g_scaling_ptrs->vpxhorizontal_line_3_5_scale_t; - break; - case 5: - // 1-2 Scale in Width direction - horiz_line_scale = g_scaling_ptrs->vpxhorizontal_line_1_2_scale_t; - break; - case 10: - // no scale in Width direction - horiz_line_scale = horizontal_line_copy; - break; - default: - // The ratio is not acceptable now - // throw("The ratio is not acceptable for now!"); - ratio_scalable = 0; - break; - } - - switch (vr * 10 / vs) - { - case 8: - // 4-5 Scale in vertical direction - vert_band_scale = g_scaling_ptrs->vpxvertical_band_4_5_scale_t; - last_vert_band_scale = g_scaling_ptrs->vpxlast_vertical_band_4_5_scale_t; - src_band_height = 4; - dest_band_height = 5; - break; - case 6: - // 3-5 Scale in vertical direction - vert_band_scale = g_scaling_ptrs->vpxvertical_band_3_5_scale_t; - last_vert_band_scale = g_scaling_ptrs->vpxlast_vertical_band_3_5_scale_t; - src_band_height = 3; - dest_band_height = 5; - break; - case 5: - // 1-2 Scale in vertical direction - vert_band_scale = g_scaling_ptrs->vpxvertical_band_1_2_scale_t; - last_vert_band_scale = g_scaling_ptrs->vpxlast_vertical_band_1_2_scale_t; - src_band_height = 1; - dest_band_height = 2; - break; - case 10: - // no scale in Width direction - vert_band_scale = null_scale; - last_vert_band_scale = null_scale; - src_band_height = 4; - dest_band_height = 4; - break; - default: - // The ratio is not acceptable now - // throw("The ratio is not acceptable for now!"); - ratio_scalable = 0; - break; - } - - if (ratio_scalable == 0) - return ratio_scalable; - - horiz_line_scale(source, source_width, dest, dest_width); - - // except last band - for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) - { - // scale one band horizontally - for (i = 1; i < src_band_height; i++) - { - horiz_line_scale(source + i * source_pitch, - source_width, - dest + i * dest_pitch, - dest_width); - } - - // first line of next band - horiz_line_scale(source + src_band_height * source_pitch, - source_width, - dest + dest_band_height * dest_pitch, - dest_width); - - // Vertical scaling is in place - vert_band_scale(dest, dest_pitch, dest_width); - - // Next band... - source += src_band_height * source_pitch; - dest += dest_band_height * dest_pitch; - } - - // scale one band horizontally - for (i = 1; i < src_band_height; i++) - { - horiz_line_scale(source + i * source_pitch, - source_width, - dest + i * dest_pitch, - dest_width); - } - - // Vertical scaling is in place - last_vert_band_scale(dest, dest_pitch, dest_width); - - return ratio_scalable; -} - -/**************************************************************************** - * - * ROUTINE : any_ratio_frame_scale - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance (NOT USED). - * unsigned char *frame_buffer : Pointer to source image. - * int YOffset : Offset from start of buffer to Y samples. - * int UVOffset : Offset from start of buffer to UV samples. - * - * OUTPUTS : None. - * - * RETURNS : int: 1 if image scaled, 0 if image could not be scaled. - * - * FUNCTION : Scale the image with changing apect ratio. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static -int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) -{ - int i; - int ew; - int eh; - - // suggested scale factors - int hs = scale_vars->HScale; - int hr = scale_vars->HRatio; - int vs = scale_vars->VScale; - int vr = scale_vars->VRatio; - - int ratio_scalable = 1; - - int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; - int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; - int dw = scale_vars->expanded_frame_width; - int dh = scale_vars->expanded_frame_height; - YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; - - if (hr == 3) - ew = (sw + 2) / 3 * 3 * hs / hr; - else - ew = (sw + 7) / 8 * 8 * hs / hr; - - if (vr == 3) - eh = (sh + 2) / 3 * 3 * vs / vr; - else - eh = (sh + 7) / 8 * 8 * vs / vr; - - ratio_scalable = any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->y_buffer, - src_yuv_config->y_stride, sw, sh, - (unsigned char *) dst_yuv_config->y_buffer + YOffset, - dst_yuv_config->y_stride, dw, dh); - - for (i = 0; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); - - for (i = dh; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); - - if (ratio_scalable == 0) - return ratio_scalable; - - sw = (sw + 1) >> 1; - sh = (sh + 1) >> 1; - dw = (dw + 1) >> 1; - dh = (dh + 1) >> 1; - - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->u_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->u_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); - - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->v_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->v_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); - - return ratio_scalable; -} - -/**************************************************************************** - * - * ROUTINE : center_image - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Centers the image without scaling in the output buffer. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -static void -center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) -{ - int i; - int row_offset, col_offset; - char *src_data_pointer; - char *dst_data_pointer; - - // center values - row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; - col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; - - // Y's - src_data_pointer = src_yuv_config->y_buffer; - dst_data_pointer = (char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; - - for (i = 0; i < src_yuv_config->y_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); - dst_data_pointer += dst_yuv_config->y_stride; - src_data_pointer += src_yuv_config->y_stride; - } - - row_offset /= 2; - col_offset /= 2; - - // U's - src_data_pointer = src_yuv_config->u_buffer; - dst_data_pointer = (char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } - - // V's - src_data_pointer = src_yuv_config->v_buffer; - dst_data_pointer = (char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } -} - -/**************************************************************************** - * - * ROUTINE : scale_or_center - * - * INPUTS : SCALE_INSTANCE *si : Pointer to post-processor instance. - * - * - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Decides to scale or center image in scale buffer for blit - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void -vp8_yv12_scale_or_center -( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio -) -{ -// if ( ppi->post_processing_level ) - // update_umvborder ( ppi, frame_buffer ); - - - switch (scaling_mode) - { - case SCALE_TO_FIT: - case MAINTAIN_ASPECT_RATIO: - { - SCALE_VARS scale_vars; - // center values -#if 1 - int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; - int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; -// int YOffset = row * dst_yuv_config->y_width + col; -// int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1); - int YOffset = row * dst_yuv_config->y_stride + col; - int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); -#else - int row = (src_yuv_config->y_height - expanded_frame_height) / 2; - int col = (src_yuv_config->y_width - expanded_frame_width) / 2; - int YOffset = row * src_yuv_config->y_width + col; - int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); -#endif - - scale_vars.dst_yuv_config = dst_yuv_config; - scale_vars.src_yuv_config = src_yuv_config; - scale_vars.HScale = HScale; - scale_vars.HRatio = HRatio; - scale_vars.VScale = VScale; - scale_vars.VRatio = VRatio; - scale_vars.expanded_frame_width = expanded_frame_width; - scale_vars.expanded_frame_height = expanded_frame_height; - - // perform center and scale - any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); - - break; - } - case CENTER: - center_image(src_yuv_config, dst_yuv_config); - break; - - default: - break; - } -} diff -Nru libvpx-0.9.5/vpx_scale/leapster/yv12extend.c libvpx-0.9.6/vpx_scale/leapster/yv12extend.c --- libvpx-0.9.5/vpx_scale/leapster/yv12extend.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/leapster/yv12extend.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** - * - * Module Title : yv12extend.c - * - * Description : - * - ***************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -//#include -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** -* Exports -****************************************************************************/ - -/**************************************************************************** - * - ****************************************************************************/ -void -vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf) -{ - int i; - char *src_ptr1, *src_ptr2; - char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - // copy the left and right most columns out - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; - - /***********/ - /* U Plane */ - /***********/ - - // copy the left and right most columns out - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - // copy the left and right most columns out - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - // Now copy the top and bottom source lines into each line of the respective borders - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} -/**************************************************************************** - * - * ROUTINE : vp8_yv12_copy_frame - * - * INPUTS : - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Copies the source image into the destination image and - * updates the destination's UMV borders. - * - * SPECIAL NOTES : The frames are assumed to be identical in size. - * - ****************************************************************************/ -void -vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - int i; - unsigned int *source; - _Uncached unsigned int *dest; - int height; - int width; - - height = src_ybc->y_height + (src_ybc->border * 2); - width = src_ybc->y_width + (src_ybc->border * 2); - width /= 4; - source = (unsigned int *)(src_ybc->y_buffer - (src_ybc->border * src_ybc->y_stride) - src_ybc->border); - dest = (_Uncached unsigned int *)(dst_ybc->y_buffer - (dst_ybc->border * dst_ybc->y_stride) - dst_ybc->border); - - for (row = 0; row < height; row++) - { - for (i = 0; i < width; i++) - { - dest[i] = source[i]; - } - - source += width; - dest += width; - } - - height = src_ybc->uv_height + (src_ybc->border); - width = src_ybc->uv_width + (src_ybc->border); - width /= 4; - - source = (unsigned int *)(src_ybc->u_buffer - (src_ybc->border / 2 * src_ybc->uv_stride) - src_ybc->border / 2); - dest = (_Uncached unsigned int *)(dst_ybc->u_buffer - (dst_ybc->border / 2 * dst_ybc->uv_stride) - dst_ybc->border / 2); - - for (row = 0; row < height; row++) - { - for (i = 0; i < width; i++) - { - dest[i] = source[i]; - } - - source += width; - dest += width; - } - - source = (unsigned int *)(src_ybc->v_buffer - (src_ybc->border / 2 * src_ybc->uv_stride) - src_ybc->border / 2); - dest = (_Uncached unsigned int *)(dst_ybc->v_buffer - (dst_ybc->border / 2 * dst_ybc->uv_stride) - dst_ybc->border / 2); - - for (row = 0; row < height; row++) - { - for (i = 0; i < width; i++) - { - dest[i] = source[i]; - } - - source += width; - dest += width; - } - -} diff -Nru libvpx-0.9.5/vpx_scale/symbian/gen_scalers_armv4.asm libvpx-0.9.6/vpx_scale/symbian/gen_scalers_armv4.asm --- libvpx-0.9.5/vpx_scale/symbian/gen_scalers_armv4.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/symbian/gen_scalers_armv4.asm 1970-01-01 00:00:00.000000000 +0000 @@ -1,774 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |horizontal_line_4_5_scale_armv4| - EXPORT |vertical_band_4_5_scale_armv4| - EXPORT |horizontal_line_2_3_scale_armv4| - EXPORT |vertical_band_2_3_scale_armv4| - EXPORT |horizontal_line_3_5_scale_armv4| - EXPORT |vertical_band_3_5_scale_armv4| - EXPORT |horizontal_line_3_4_scale_armv4| - EXPORT |vertical_band_3_4_scale_armv4| - EXPORT |horizontal_line_1_2_scale_armv4| - EXPORT |vertical_band_1_2_scale_armv4| - - AREA |.text|, CODE, READONLY ; name this block of code - -src RN r0 -srcw RN r1 -dest RN r2 -mask RN r12 -c51_205 RN r10 -c102_154 RN r11 -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_4_5_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 4 to 5. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void horizontal_line_4_5_scale_armv4 -;( -; r0 = UINT8 *source -; r1 = UINT32 source_width -; r2 = UINT8 *dest -; r3 = UINT32 dest_width -;) -|horizontal_line_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - mov mask, #255 ; mask for selection - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldr r3, [src], #4 - -hl45_loop - - and r4, r3, mask ; a = src[0] - and r5, mask, r3, lsr #8 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - and r7, mask, r3, lsr #16 ; c = src[2] - mul r6, c51_205, r6 ; a * 51 + 205 * b - - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - and r8, mask, r3, lsr #24 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - ldr r3, [src], #4 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - and r9, mask, r3 ; e = src[4] - orr r9, r9, r8, lsl #16 ; d | e - mul r9, c51_205, r9 ; d * 205 + 51 * e - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #4 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bne hl45_loop - - and r4, r3, mask - and r5, mask, r3, lsl #8 - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 - - and r7, mask, r3, lsl #16 - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 - add r6, r6, #0x8000 - and r8, mask, r3, lsl #24 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - ldrb r3, [src] - strb r3, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_4_5_scale_c| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_4_5_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The -; * height of the band scaled is 4-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl45_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 ; a * 51 + 205 * b - - ldrb r8, [r3], r1 ; d = des[dest_pitch*3] - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - orr r7, r8, r7, lsl #16 ; c | d - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - ldrb r9, [r3, r1] ; e = des [dest_pitch * 5] - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - orr r9, r9, r8, lsl #16 ; d | e - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - mul r9, c51_205, r9 ; d * 205 + 51 * e - add r7, r7, #0x8000 - add src, src, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - add r9, r9, #0x8000 - subs r2, r2, #1 - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - bne vl45_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_4_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_2_3_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 2 to 3. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_2_3_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - ldr lr, =85 - ldr r12, =171 - -hl23_loop - - ldrb r3, [src], #1 ; a - ldrb r4, [src], #1 ; b - ldrb r5, [src] ; c - - strb r3, [dest], #1 - mul r4, r12, r4 ; b * 171 - mla r6, lr, r3, r4 ; a * 85 - mla r7, lr, r5, r4 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest], #1 - - add r7, r7, #128 - mov r7, r7, lsr #8 - strb r7, [dest], #1 - - subs srcw, srcw, #2 - bne hl23_loop - - ldrb r4, [src, #1] ; b - strb r5, [dest], #1 - strb r4, [dest, #1] - - mul r4, r12, r4 ; b * 171 - mla r6, lr, r5, r4 ; a * 85 + b *171 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest] - - ldmia sp!, {r4 - r11, pc} - ENDP ;|horizontal_line_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_2_3_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The -; * height of the band scaled is 2-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_2_3_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r8, lr} - ldr lr, =85 - ldr r12, =171 - add r3, r1, r1, lsl #1 ; 3 * dest_pitch - -vl23_loop - ldrb r4, [src] ; a = des [0] - ldrb r5, [src, r1] ; b = des [dest_pitch] - ldrb r7, [src, r3] ; c = des [dest_pitch*3] - subs r2, r2, #1 - - mul r5, r12, r5 ; b * 171 - mla r6, lr, r4, r5 ; a * 85 - mla r8, lr, r7, r5 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [src, r1] - - add r8, r8, #128 - mov r8, r8, lsr #8 - strb r8, [src, r1, lsl #1] - - add src, src, #1 - - bne vl23_loop - - ldmia sp!, {r4 - r8, pc} - ENDP ;|vertical_band_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_3_5_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 5. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_3_5_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldrb r4, [src], #1 ; a = src[0] - -hl35_loop - - ldrb r8, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - ldrb r4, [src], #1 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - orr r9, r4, r9, lsl #16 ; c | d - mul r9, c102_154, r9 ; c * 154 + 102 * d - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #3 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bpl hl35_loop - - ldrb r5, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - strb r9, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_5_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_3_5_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl35_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r8, r4, r5, lsl #16 ; b | a - mul r6, c102_154, r8 ; a * 102 + 154 * b - - ldrb r8, [r3, r1, lsl #1] ; d = des[dest_pitch*5] - orr r3, r7, r5, lsl #16 ; b | c - mul r9, c51_205, r3 ; b * 205 + 51 * c - add r6, r6, #0x8000 - orr r3, r5, r7, lsl #16 ; c | b - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - mul r5, c51_205, r3 ; c * 205 + 154 * b - add r9, r9, #0x8000 - orr r3, r8, r7, lsl #16 ; c | d - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - mul r7, c102_154, r3 ; c * 154 + 102 * d - add r5, r5, #0x8000 - add src, src, #1 - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - add r7, r7, #0x8000 - subs r2, r2, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - - bne vl35_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_3_4_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 4. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_3_4_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - - ldrb r4, [src], #1 ; a = src[0] - -hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - strb r8, [dest], #1 - - ldrb r4, [src], #1 ; [a+1] - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - - subs srcw, srcw, #3 - - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - strb r7, [dest], #1 - - bpl hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - strb r8, [dest], #1 - strb r7, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_4_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_3_4_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_3_4_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - -; ldr r1,[r1] -vl34_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des [dest_pitch*2] - add lr, src, r1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r5, r4 ; a*64 + b*192 + 1 - - add r5, r5, #1 ; b + 1 - add r5, r5, r7 ; b + c + 1 - mov r5, r5, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [lr], r1 - - ldrb r4, [r3, r1] ; a = des [dest_pitch*4] - - strb r5, [lr], r1 - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - - add src, src, #1 - subs r2, r2, #1 - - strb r7, [lr] - - bne vl34_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_4_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_1_2_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 1 to 2. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_1_2_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r5, lr} - - sub srcw, srcw, #1 - - ldrb r3, [src], #1 - ldrb r4, [src], #1 -hl12_loop - subs srcw, srcw, #1 - - add r5, r3, r4 - add r5, r5, #1 - mov r5, r5, lsr #1 - - orr r5, r3, r5, lsl #8 - strh r5, [dest], #2 - - mov r3, r4 - - ldrneb r4, [src], #1 - bne hl12_loop - - orr r5, r4, r4, lsl #8 - strh r5, [dest] - - ldmia sp!, {r4 - r5, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_1_2_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The -; * height of the band scaled is 1-pixel. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vp8cx_vertical_band_1_2_scale_c -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r7, lr} - - ldr mask, =0xff00ff ; mask for selection - ldr lr, = 0x010001 - -vl12_loop - mov r3, src - ldr r4, [r3], r1 - ldr r5, [r3, r1] - - add src, src, #4 - subs r2, r2, #4 - - and r6, r4, mask - and r7, r5, mask - - add r6, r7, r6 - add r6, r6, lr - - and r4, mask, r4, lsr #8 - and r5, mask, r5, lsr #8 - - mov r6, r6, lsr #1 - and r6, r6, mask - - add r4, r5, r4 - add r4, r4, lr - - mov r4, r4, lsr #1 - and r4, r4, mask - - orr r5, r6, r4, lsl #8 - - str r5, [r3] - - bpl vl12_loop - - ldmia sp!, {r4 - r7, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - - END diff -Nru libvpx-0.9.5/vpx_scale/symbian/gen_scalers_armv4.s libvpx-0.9.6/vpx_scale/symbian/gen_scalers_armv4.s --- libvpx-0.9.5/vpx_scale/symbian/gen_scalers_armv4.s 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/symbian/gen_scalers_armv4.s 1970-01-01 00:00:00.000000000 +0000 @@ -1,808 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - - .equ WIDE_REFERENCE, 0 - .ifndef ARCHITECTURE - .equ ARCHITECTURE, 5 - .endif - .global horizontal_line_4_5_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type horizontal_line_4_5_scale_armv4, function - .endif - .global vertical_band_4_5_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type vertical_band_4_5_scale_armv4, function - .endif - .global horizontal_line_2_3_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type horizontal_line_2_3_scale_armv4, function - .endif - .global vertical_band_2_3_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type vertical_band_2_3_scale_armv4, function - .endif - .global horizontal_line_3_5_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type horizontal_line_3_5_scale_armv4, function - .endif - .global vertical_band_3_5_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type vertical_band_3_5_scale_armv4, function - .endif - .global horizontal_line_3_4_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type horizontal_line_3_4_scale_armv4, function - .endif - .global vertical_band_3_4_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type vertical_band_3_4_scale_armv4, function - .endif - .global horizontal_line_1_2_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type horizontal_line_1_2_scale_armv4, function - .endif - .global vertical_band_1_2_scale_armv4 - .ifndef NO_TYPE_PSEUDO_OP - .type vertical_band_1_2_scale_armv4, function - .endif - -.text - -src .req r0 -srcw .req r1 -dest .req r2 -mask .req r12 -c51_205 .req r10 -c102_154 .req r11 -@/**************************************************************************** -@ * -@ * ROUTINE : horizontal_line_4_5_scale_armv4 -@ * -@ * INPUTS : const unsigned char *source : Pointer to source data. -@ * unsigned int source_width : Stride of source. -@ * unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_width : Stride of destination (NOT USED). -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Copies horizontal line of pixels from source to -@ * destination scaling up by 4 to 5. -@ * -@ * SPECIAL NOTES : None. -@ * -@ ****************************************************************************/ -@void horizontal_line_4_5_scale_armv4 -@( -@ r0 = UINT8 *source -@ r1 = UINT32 source_width -@ r2 = UINT8 *dest -@ r3 = UINT32 dest_width -@) -_HorizontalLine_4_5_Scale_ARMv4: - horizontal_line_4_5_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - mov mask, #255 @ mask for selection - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldr r3, [src], #4 - -hl45_loop: - - and r4, r3, mask @ a = src[0] - and r5, mask, r3, lsr #8 @ b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 @ b | a - and r7, mask, r3, lsr #16 @ c = src[2] - mul r6, c51_205, r6 @ a * 51 + 205 * b - - orr r5, r5, r7, lsl #16 @ c | b - mul r5, c102_154, r5 @ b * 102 + 154 * c - add r6, r6, #0x8000 - and r8, mask, r3, lsr #24 @ d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 @ c | d - mul r7, c102_154, r7 @ c * 154 + 102 * d - add r5, r5, #0x8000 - ldr r3, [src], #4 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - and r9, mask, r3 @ e = src[4] - orr r9, r9, r8, lsl #16 @ d | e - mul r9, c51_205, r9 @ d * 205 + 51 * e - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #4 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bne hl45_loop - - and r4, r3, mask - and r5, mask, r3, lsl #8 - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 @ b | a - mul r6, c51_205, r6 - - and r7, mask, r3, lsl #16 - orr r5, r5, r7, lsl #16 @ c | b - mul r5, c102_154, r5 - add r6, r6, #0x8000 - and r8, mask, r3, lsl #24 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 @ c | d - mul r7, c102_154, r7 - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - ldrb r3, [src] - strb r3, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - @ @|vp8cx_horizontal_line_4_5_scale_c| - -@/**************************************************************************** -@ * -@ * ROUTINE : vertical_band_4_5_scale_armv4 -@ * -@ * INPUTS : unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_pitch : Stride of destination data. -@ * unsigned int dest_width : Width of destination data. -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The -@ * height of the band scaled is 4-pixels. -@ * -@ * SPECIAL NOTES : The routine uses the first line of the band below -@ * the current band. -@ * -@ ****************************************************************************/ -@void vertical_band_4_5_scale_armv4 -@( -@ r0 = UINT8 *dest -@ r1 = UINT32 dest_pitch -@ r2 = UINT32 dest_width -@) -_VerticalBand_4_5_Scale_ARMv4: - vertical_band_4_5_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl45_loop: - mov r3, src - ldrb r4, [r3], r1 @ a = des [0] - ldrb r5, [r3], r1 @ b = des [dest_pitch] - ldrb r7, [r3], r1 @ c = des[dest_pitch*2] - add lr, src, r1 - - orr r6, r4, r5, lsl #16 @ b | a - mul r6, c51_205, r6 @ a * 51 + 205 * b - - ldrb r8, [r3], r1 @ d = des[dest_pitch*3] - orr r5, r5, r7, lsl #16 @ c | b - mul r5, c102_154, r5 @ b * 102 + 154 * c - add r6, r6, #0x8000 - orr r7, r8, r7, lsl #16 @ c | d - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - ldrb r9, [r3, r1] @ e = des [dest_pitch * 5] - mul r7, c102_154, r7 @ c * 154 + 102 * d - add r5, r5, #0x8000 - orr r9, r9, r8, lsl #16 @ d | e - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - mul r9, c51_205, r9 @ d * 205 + 51 * e - add r7, r7, #0x8000 - add src, src, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - add r9, r9, #0x8000 - subs r2, r2, #1 - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - bne vl45_loop - - ldmia sp!, {r4 - r11, pc} - @ @|vertical_band_4_5_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : horizontal_line_2_3_scale_armv4 -@ * -@ * INPUTS : const unsigned char *source : Pointer to source data. -@ * unsigned int source_width : Stride of source. -@ * unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_width : Stride of destination (NOT USED). -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Copies horizontal line of pixels from source to -@ * destination scaling up by 2 to 3. -@ * -@ * SPECIAL NOTES : None. -@ * -@ * -@ ****************************************************************************/ -@void horizontal_line_2_3_scale_armv4 -@( -@ const unsigned char *source, -@ unsigned int source_width, -@ unsigned char *dest, -@ unsigned int dest_width -@) -_HorizontalLine_2_3_Scale_ARMv4: - horizontal_line_2_3_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - ldr lr, =85 - ldr r12, =171 - -hl23_loop: - - ldrb r3, [src], #1 @ a - ldrb r4, [src], #1 @ b - ldrb r5, [src] @ c - - strb r3, [dest], #1 - mul r4, r12, r4 @ b * 171 - mla r6, lr, r3, r4 @ a * 85 - mla r7, lr, r5, r4 @ c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest], #1 - - add r7, r7, #128 - mov r7, r7, lsr #8 - strb r7, [dest], #1 - - subs srcw, srcw, #2 - bne hl23_loop - - ldrb r4, [src, #1] @ b - strb r5, [dest], #1 - strb r4, [dest, #1] - - mul r4, r12, r4 @ b * 171 - mla r6, lr, r5, r4 @ a * 85 + b *171 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest] - - ldmia sp!, {r4 - r11, pc} - @ @|horizontal_line_2_3_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : vertical_band_2_3_scale_armv4 -@ * -@ * INPUTS : unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_pitch : Stride of destination data. -@ * unsigned int dest_width : Width of destination data. -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The -@ * height of the band scaled is 2-pixels. -@ * -@ * SPECIAL NOTES : The routine uses the first line of the band below -@ * the current band. -@ * -@ ****************************************************************************/ -@void vertical_band_2_3_scale_armv4 -@( -@ r0 = UINT8 *dest -@ r1 = UINT32 dest_pitch -@ r2 = UINT32 dest_width -@) -_VerticalBand_2_3_Scale_ARMv4: - vertical_band_2_3_scale_armv4: @ - stmdb sp!, {r4 - r8, lr} - ldr lr, =85 - ldr r12, =171 - add r3, r1, r1, lsl #1 @ 3 * dest_pitch - -vl23_loop: - ldrb r4, [src] @ a = des [0] - ldrb r5, [src, r1] @ b = des [dest_pitch] - ldrb r7, [src, r3] @ c = des [dest_pitch*3] - subs r2, r2, #1 - - mul r5, r12, r5 @ b * 171 - mla r6, lr, r4, r5 @ a * 85 - mla r8, lr, r7, r5 @ c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [src, r1] - - add r8, r8, #128 - mov r8, r8, lsr #8 - strb r8, [src, r1, lsl #1] - - add src, src, #1 - - bne vl23_loop - - ldmia sp!, {r4 - r8, pc} - @ @|vertical_band_2_3_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : vp8cx_horizontal_line_3_5_scale_c -@ * -@ * INPUTS : const unsigned char *source : Pointer to source data. -@ * unsigned int source_width : Stride of source. -@ * unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_width : Stride of destination (NOT USED). -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Copies horizontal line of pixels from source to -@ * destination scaling up by 3 to 5. -@ * -@ * SPECIAL NOTES : None. -@ * -@ * -@ ****************************************************************************/ -@void vp8cx_horizontal_line_3_5_scale_c -@( -@ const unsigned char *source, -@ unsigned int source_width, -@ unsigned char *dest, -@ unsigned int dest_width -@) -_HorizontalLine_3_5_Scale_ARMv4: - horizontal_line_3_5_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldrb r4, [src], #1 @ a = src[0] - -hl35_loop: - - ldrb r8, [src], #1 @ b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 @ b | a - ldrb r9, [src], #1 @ c = src[2] - mul r6, c102_154, r6 @ a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 @ b | c - mul r5, c51_205, r5 @ b * 205 + 51 * c - add r6, r6, #0x8000 - ldrb r4, [src], #1 @ d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 @ c | b - mul r7, c51_205, r7 @ c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - orr r9, r4, r9, lsl #16 @ c | d - mul r9, c102_154, r9 @ c * 154 + 102 * d - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #3 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bpl hl35_loop - - ldrb r5, [src], #1 @ b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 @ b | a - ldrb r9, [src], #1 @ c = src[2] - mul r6, c102_154, r6 @ a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 @ b | c - mul r5, c51_205, r5 @ b * 205 + 51 * c - add r6, r6, #0x8000 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 @ c | b - mul r7, c51_205, r7 @ c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - strb r9, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - @ @|vp8cx_horizontal_line_3_5_scale_c| - - -@/**************************************************************************** -@ * -@ * ROUTINE : vp8cx_vertical_band_3_5_scale_c -@ * -@ * INPUTS : unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_pitch : Stride of destination data. -@ * unsigned int dest_width : Width of destination data. -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The -@ * height of the band scaled is 3-pixels. -@ * -@ * SPECIAL NOTES : The routine uses the first line of the band below -@ * the current band. -@ * -@ ****************************************************************************/ -@void vertical_band_4_5_scale_armv4 -@( -@ r0 = UINT8 *dest -@ r1 = UINT32 dest_pitch -@ r2 = UINT32 dest_width -@) -_VerticalBand_3_5_Scale_ARMv4: - vertical_band_3_5_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl35_loop: - mov r3, src - ldrb r4, [r3], r1 @ a = des [0] - ldrb r5, [r3], r1 @ b = des [dest_pitch] - ldrb r7, [r3], r1 @ c = des[dest_pitch*2] - add lr, src, r1 - - orr r8, r4, r5, lsl #16 @ b | a - mul r6, c102_154, r8 @ a * 102 + 154 * b - - ldrb r8, [r3, r1, lsl #1] @ d = des[dest_pitch*5] - orr r3, r7, r5, lsl #16 @ b | c - mul r9, c51_205, r3 @ b * 205 + 51 * c - add r6, r6, #0x8000 - orr r3, r5, r7, lsl #16 @ c | b - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - mul r5, c51_205, r3 @ c * 205 + 154 * b - add r9, r9, #0x8000 - orr r3, r8, r7, lsl #16 @ c | d - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - mul r7, c102_154, r3 @ c * 154 + 102 * d - add r5, r5, #0x8000 - add src, src, #1 - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - add r7, r7, #0x8000 - subs r2, r2, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - - bne vl35_loop - - ldmia sp!, {r4 - r11, pc} - @ @|vertical_band_3_5_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : horizontal_line_3_4_scale_armv4 -@ * -@ * INPUTS : const unsigned char *source : Pointer to source data. -@ * unsigned int source_width : Stride of source. -@ * unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_width : Stride of destination (NOT USED). -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Copies horizontal line of pixels from source to -@ * destination scaling up by 3 to 4. -@ * -@ * SPECIAL NOTES : None. -@ * -@ * -@ ****************************************************************************/ -@void horizontal_line_3_4_scale_armv4 -@( -@ const unsigned char *source, -@ unsigned int source_width, -@ unsigned char *dest, -@ unsigned int dest_width -@) -_HorizontalLine_3_4_Scale_ARMv4: - horizontal_line_3_4_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - - ldrb r4, [src], #1 @ a = src[0] - -hl34_loop: - - ldrb r8, [src], #1 @ b = src[1] - ldrb r7, [src], #1 @ c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 @ a*64 + 128 - mla r4, r11, r8, r4 @ a*64 + b*192 + 1 - - add r8, r8, #1 @ b + 1 - add r8, r8, r7 @ b + c + 1 - mov r8, r8, asr #1 @ (b + c + 1) >> 1 - - mov r4, r4, asr #8 @ (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - strb r8, [dest], #1 - - ldrb r4, [src], #1 @ [a+1] - - mla r7, r11, r7, r9 @ c*192 + 128 - mla r7, r4, r10, r7 @ a*64 + b*192 + 128 - - subs srcw, srcw, #3 - - mov r7, r7, asr #8 @ (a*64 + b*192 + 128) >> 8 - strb r7, [dest], #1 - - bpl hl34_loop - - ldrb r8, [src], #1 @ b = src[1] - ldrb r7, [src], #1 @ c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 @ a*64 + 128 - mla r4, r11, r8, r4 @ a*64 + b*192 + 1 - mov r4, r4, asr #8 @ (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - add r8, r8, #1 @ b + 1 - add r8, r8, r7 @ b + c + 1 - mov r8, r8, asr #1 @ (b + c + 1) >> 1 - strb r8, [dest], #1 - strb r7, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - @ @|vp8cx_horizontal_line_3_4_scale_c| - - -@/**************************************************************************** -@ * -@ * ROUTINE : vertical_band_3_4_scale_armv4 -@ * -@ * INPUTS : unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_pitch : Stride of destination data. -@ * unsigned int dest_width : Width of destination data. -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The -@ * height of the band scaled is 3-pixels. -@ * -@ * SPECIAL NOTES : The routine uses the first line of the band below -@ * the current band. -@ * -@ ****************************************************************************/ -@void vertical_band_3_4_scale_armv4 -@( -@ r0 = UINT8 *dest -@ r1 = UINT32 dest_pitch -@ r2 = UINT32 dest_width -@) -_VerticalBand_3_4_Scale_ARMv4: - vertical_band_3_4_scale_armv4: @ - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - -@ ldr r1,[r1] -vl34_loop: - mov r3, src - ldrb r4, [r3], r1 @ a = des [0] - ldrb r5, [r3], r1 @ b = des [dest_pitch] - ldrb r7, [r3], r1 @ c = des [dest_pitch*2] - add lr, src, r1 - - mla r4, r10, r4, r9 @ a*64 + 128 - mla r4, r11, r5, r4 @ a*64 + b*192 + 1 - - add r5, r5, #1 @ b + 1 - add r5, r5, r7 @ b + c + 1 - mov r5, r5, asr #1 @ (b + c + 1) >> 1 - - mov r4, r4, asr #8 @ (a*64 + b*192 + 1) >> 8 - strb r4, [lr], r1 - - ldrb r4, [r3, r1] @ a = des [dest_pitch*4] - - strb r5, [lr], r1 - - mla r7, r11, r7, r9 @ c*192 + 128 - mla r7, r4, r10, r7 @ a*64 + b*192 + 128 - mov r7, r7, asr #8 @ (a*64 + b*192 + 128) >> 8 - - add src, src, #1 - subs r2, r2, #1 - - strb r7, [lr] - - bne vl34_loop - - ldmia sp!, {r4 - r11, pc} - @ @|vertical_band_3_4_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : vp8cx_horizontal_line_1_2_scale_c -@ * -@ * INPUTS : const unsigned char *source : Pointer to source data. -@ * unsigned int source_width : Stride of source. -@ * unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_width : Stride of destination (NOT USED). -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Copies horizontal line of pixels from source to -@ * destination scaling up by 1 to 2. -@ * -@ * SPECIAL NOTES : None. -@ * -@ ****************************************************************************/ -@void vp8cx_horizontal_line_1_2_scale_c -@( -@ const unsigned char *source, -@ unsigned int source_width, -@ unsigned char *dest, -@ unsigned int dest_width -@) -_HorizontalLine_1_2_Scale_ARMv4: - horizontal_line_1_2_scale_armv4: @ - stmdb sp!, {r4 - r5, lr} - - sub srcw, srcw, #1 - - ldrb r3, [src], #1 - ldrb r4, [src], #1 -hl12_loop: - subs srcw, srcw, #1 - - add r5, r3, r4 - add r5, r5, #1 - mov r5, r5, lsr #1 - - orr r5, r3, r5, lsl #8 - strh r5, [dest], #2 - - mov r3, r4 - - ldrneb r4, [src], #1 - bne hl12_loop - - orr r5, r4, r4, lsl #8 - strh r5, [dest] - - ldmia sp!, {r4 - r5, pc} - @ @|vertical_band_3_5_scale_armv4| - -@/**************************************************************************** -@ * -@ * ROUTINE : vp8cx_vertical_band_1_2_scale_c -@ * -@ * INPUTS : unsigned char *dest : Pointer to destination data. -@ * unsigned int dest_pitch : Stride of destination data. -@ * unsigned int dest_width : Width of destination data. -@ * -@ * OUTPUTS : None. -@ * -@ * RETU.req_s : void -@ * -@ * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The -@ * height of the band scaled is 1-pixel. -@ * -@ * SPECIAL NOTES : The routine uses the first line of the band below -@ * the current band. -@ * -@ ****************************************************************************/ -@void vp8cx_vertical_band_1_2_scale_c -@( -@ r0 = UINT8 *dest -@ r1 = UINT32 dest_pitch -@ r2 = UINT32 dest_width -@) -_VerticalBand_1_2_Scale_ARMv4: - vertical_band_1_2_scale_armv4: @ - stmdb sp!, {r4 - r7, lr} - - ldr mask, =0xff00ff @ mask for selection - ldr lr, = 0x010001 - -vl12_loop: - mov r3, src - ldr r4, [r3], r1 - ldr r5, [r3, r1] - - add src, src, #4 - subs r2, r2, #4 - - and r6, r4, mask - and r7, r5, mask - - add r6, r7, r6 - add r6, r6, lr - - and r4, mask, r4, lsr #8 - and r5, mask, r5, lsr #8 - - mov r6, r6, lsr #1 - and r6, r6, mask - - add r4, r5, r4 - add r4, r4, lr - - mov r4, r4, lsr #1 - and r4, r4, mask - - orr r5, r6, r4, lsl #8 - - str r5, [r3] - - bpl vl12_loop - - ldmia sp!, {r4 - r7, pc} - @ @|vertical_band_3_5_scale_armv4| diff -Nru libvpx-0.9.5/vpx_scale/symbian/scalesystemdependant.c libvpx-0.9.6/vpx_scale/symbian/scalesystemdependant.c --- libvpx-0.9.5/vpx_scale/symbian/scalesystemdependant.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/symbian/scalesystemdependant.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_scale/vpxscale.h" - -/**************************************************************************** - * - * ROUTINE : vp8_scale_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void vp8_scale_machine_specific_config() -{ -#ifndef VPX_NO_GLOBALS - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_armv4; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_armv4; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = horizontal_line_3_4_scale_armv4; - vp8_vertical_band_3_4_scale = vertical_band_3_4_scale_armv4; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = horizontal_line_2_3_scale_armv4; - vp8_vertical_band_2_3_scale = vertical_band_2_3_scale_armv4; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_armv4; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_armv4; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - - - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; -#endif -} diff -Nru libvpx-0.9.5/vpx_scale/wce/gen_scalers_armv4.asm libvpx-0.9.6/vpx_scale/wce/gen_scalers_armv4.asm --- libvpx-0.9.5/vpx_scale/wce/gen_scalers_armv4.asm 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/wce/gen_scalers_armv4.asm 1970-01-01 00:00:00.000000000 +0000 @@ -1,774 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |horizontal_line_4_5_scale_armv4| - EXPORT |vertical_band_4_5_scale_armv4| - EXPORT |horizontal_line_2_3_scale_armv4| - EXPORT |vertical_band_2_3_scale_armv4| - EXPORT |horizontal_line_3_5_scale_armv4| - EXPORT |vertical_band_3_5_scale_armv4| - EXPORT |horizontal_line_3_4_scale_armv4| - EXPORT |vertical_band_3_4_scale_armv4| - EXPORT |horizontal_line_1_2_scale_armv4| - EXPORT |vertical_band_1_2_scale_armv4| - - AREA |.text|, CODE, READONLY ; name this block of code - -src RN r0 -srcw RN r1 -dest RN r2 -mask RN r12 -c51_205 RN r10 -c102_154 RN r11 -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_4_5_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 4 to 5. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void horizontal_line_4_5_scale_armv4 -;( -; r0 = UINT8 *source -; r1 = UINT32 source_width -; r2 = UINT8 *dest -; r3 = UINT32 dest_width -;) -|horizontal_line_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - mov mask, #255 ; mask for selection - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldr r3, [src], #4 - -hl45_loop - - and r4, r3, mask ; a = src[0] - and r5, mask, r3, lsr #8 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - and r7, mask, r3, lsr #16 ; c = src[2] - mul r6, c51_205, r6 ; a * 51 + 205 * b - - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - and r8, mask, r3, lsr #24 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - ldr r3, [src], #4 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - and r9, mask, r3 ; e = src[4] - orr r9, r9, r8, lsl #16 ; d | e - mul r9, c51_205, r9 ; d * 205 + 51 * e - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #4 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bne hl45_loop - - and r4, r3, mask - and r5, mask, r3, lsl #8 - strb r4, [dest], #1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 - - and r7, mask, r3, lsl #16 - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 - add r6, r6, #0x8000 - and r8, mask, r3, lsl #24 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r7, lsl #16 ; c | d - mul r7, c102_154, r7 - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - ldrb r3, [src] - strb r3, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_4_5_scale_c| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_4_5_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The -; * height of the band scaled is 4-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_4_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl45_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r6, r4, r5, lsl #16 ; b | a - mul r6, c51_205, r6 ; a * 51 + 205 * b - - ldrb r8, [r3], r1 ; d = des[dest_pitch*3] - orr r5, r5, r7, lsl #16 ; c | b - mul r5, c102_154, r5 ; b * 102 + 154 * c - add r6, r6, #0x8000 - orr r7, r8, r7, lsl #16 ; c | d - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - ldrb r9, [r3, r1] ; e = des [dest_pitch * 5] - mul r7, c102_154, r7 ; c * 154 + 102 * d - add r5, r5, #0x8000 - orr r9, r9, r8, lsl #16 ; d | e - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - mul r9, c51_205, r9 ; d * 205 + 51 * e - add r7, r7, #0x8000 - add src, src, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - add r9, r9, #0x8000 - subs r2, r2, #1 - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - bne vl45_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_4_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_2_3_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 2 to 3. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_2_3_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - ldr lr, =85 - ldr r12, =171 - -hl23_loop - - ldrb r3, [src], #1 ; a - ldrb r4, [src], #1 ; b - ldrb r5, [src] ; c - - strb r3, [dest], #1 - mul r4, r12, r4 ; b * 171 - mla r6, lr, r3, r4 ; a * 85 - mla r7, lr, r5, r4 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest], #1 - - add r7, r7, #128 - mov r7, r7, lsr #8 - strb r7, [dest], #1 - - subs srcw, srcw, #2 - bne hl23_loop - - ldrb r4, [src, #1] ; b - strb r5, [dest], #1 - strb r4, [dest, #1] - - mul r4, r12, r4 ; b * 171 - mla r6, lr, r5, r4 ; a * 85 + b *171 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [dest] - - ldmia sp!, {r4 - r11, pc} - ENDP ;|horizontal_line_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_2_3_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 2 to 3. The -; * height of the band scaled is 2-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_2_3_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_2_3_scale_armv4| PROC - stmdb sp!, {r4 - r8, lr} - ldr lr, =85 - ldr r12, =171 - add r3, r1, r1, lsl #1 ; 3 * dest_pitch - -vl23_loop - ldrb r4, [src] ; a = des [0] - ldrb r5, [src, r1] ; b = des [dest_pitch] - ldrb r7, [src, r3] ; c = des [dest_pitch*3] - subs r2, r2, #1 - - mul r5, r12, r5 ; b * 171 - mla r6, lr, r4, r5 ; a * 85 - mla r8, lr, r7, r5 ; c * 85 - - add r6, r6, #128 - mov r6, r6, lsr #8 - strb r6, [src, r1] - - add r8, r8, #128 - mov r8, r8, lsr #8 - strb r8, [src, r1, lsl #1] - - add src, src, #1 - - bne vl23_loop - - ldmia sp!, {r4 - r8, pc} - ENDP ;|vertical_band_2_3_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_3_5_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 5. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_3_5_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - - ldrb r4, [src], #1 ; a = src[0] - -hl35_loop - - ldrb r8, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - ldrb r4, [src], #1 ; d = src[3] - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - orr r9, r4, r9, lsl #16 ; c | d - mul r9, c102_154, r9 ; c * 154 + 102 * d - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - - add r9, r9, #0x8000 - subs srcw, srcw, #3 - mov r9, r9, lsr #24 - strb r9, [dest], #1 - - bpl hl35_loop - - ldrb r5, [src], #1 ; b = src[1] - strb r4, [dest], #1 - - orr r6, r4, r8, lsl #16 ; b | a - ldrb r9, [src], #1 ; c = src[2] - mul r6, c102_154, r6 ; a * 102 + 154 * b - - orr r5, r9, r8, lsl #16 ; b | c - mul r5, c51_205, r5 ; b * 205 + 51 * c - add r6, r6, #0x8000 - mov r6, r6, lsr #24 - strb r6, [dest], #1 - - orr r7, r8, r9, lsl #16 ; c | b - mul r7, c51_205, r7 ; c * 205 + 154 * b - add r5, r5, #0x8000 - mov r5, r5, lsr #24 - strb r5, [dest], #1 - - add r7, r7, #0x8000 - mov r7, r7, lsr #24 - strb r7, [dest], #1 - strb r9, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_5_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_3_5_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_4_5_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_5_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr c51_205, =0x3300cd - ldr c102_154, =0x66009a - -vl35_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des[dest_pitch*2] - add lr, src, r1 - - orr r8, r4, r5, lsl #16 ; b | a - mul r6, c102_154, r8 ; a * 102 + 154 * b - - ldrb r8, [r3, r1, lsl #1] ; d = des[dest_pitch*5] - orr r3, r7, r5, lsl #16 ; b | c - mul r9, c51_205, r3 ; b * 205 + 51 * c - add r6, r6, #0x8000 - orr r3, r5, r7, lsl #16 ; c | b - mov r6, r6, lsr #24 - strb r6, [lr], r1 - - mul r5, c51_205, r3 ; c * 205 + 154 * b - add r9, r9, #0x8000 - orr r3, r8, r7, lsl #16 ; c | d - mov r9, r9, lsr #24 - strb r9, [lr], r1 - - mul r7, c102_154, r3 ; c * 154 + 102 * d - add r5, r5, #0x8000 - add src, src, #1 - mov r5, r5, lsr #24 - strb r5, [lr], r1 - - add r7, r7, #0x8000 - subs r2, r2, #1 - mov r7, r7, lsr #24 - strb r7, [lr], r1 - - - bne vl35_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : horizontal_line_3_4_scale_armv4 -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 3 to 4. -; * -; * SPECIAL NOTES : None. -; * -; * -; ****************************************************************************/ -;void horizontal_line_3_4_scale_armv4 -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - - ldrb r4, [src], #1 ; a = src[0] - -hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - strb r8, [dest], #1 - - ldrb r4, [src], #1 ; [a+1] - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - - subs srcw, srcw, #3 - - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - strb r7, [dest], #1 - - bpl hl34_loop - - ldrb r8, [src], #1 ; b = src[1] - ldrb r7, [src], #1 ; c = src[2] - strb r4, [dest], #1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r8, r4 ; a*64 + b*192 + 1 - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [dest], #1 - - add r8, r8, #1 ; b + 1 - add r8, r8, r7 ; b + c + 1 - mov r8, r8, asr #1 ; (b + c + 1) >> 1 - strb r8, [dest], #1 - strb r7, [dest], #1 - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vp8cx_horizontal_line_3_4_scale_c| - - -;/**************************************************************************** -; * -; * ROUTINE : vertical_band_3_4_scale_armv4 -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 3 to 4. The -; * height of the band scaled is 3-pixels. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vertical_band_3_4_scale_armv4 -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_3_4_scale_armv4| PROC - stmdb sp!, {r4 - r11, lr} - - ldr r10, =64 - ldr r11, =192 - mov r9, #128 - -; ldr r1,[r1] -vl34_loop - mov r3, src - ldrb r4, [r3], r1 ; a = des [0] - ldrb r5, [r3], r1 ; b = des [dest_pitch] - ldrb r7, [r3], r1 ; c = des [dest_pitch*2] - add lr, src, r1 - - mla r4, r10, r4, r9 ; a*64 + 128 - mla r4, r11, r5, r4 ; a*64 + b*192 + 1 - - add r5, r5, #1 ; b + 1 - add r5, r5, r7 ; b + c + 1 - mov r5, r5, asr #1 ; (b + c + 1) >> 1 - - mov r4, r4, asr #8 ; (a*64 + b*192 + 1) >> 8 - strb r4, [lr], r1 - - ldrb r4, [r3, r1] ; a = des [dest_pitch*4] - - strb r5, [lr], r1 - - mla r7, r11, r7, r9 ; c*192 + 128 - mla r7, r4, r10, r7 ; a*64 + b*192 + 128 - mov r7, r7, asr #8 ; (a*64 + b*192 + 128) >> 8 - - add src, src, #1 - subs r2, r2, #1 - - strb r7, [lr] - - bne vl34_loop - - ldmia sp!, {r4 - r11, pc} - ENDP ;|vertical_band_3_4_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_horizontal_line_1_2_scale_c -; * -; * INPUTS : const unsigned char *source : Pointer to source data. -; * unsigned int source_width : Stride of source. -; * unsigned char *dest : Pointer to destination data. -; * unsigned int dest_width : Stride of destination (NOT USED). -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Copies horizontal line of pixels from source to -; * destination scaling up by 1 to 2. -; * -; * SPECIAL NOTES : None. -; * -; ****************************************************************************/ -;void vp8cx_horizontal_line_1_2_scale_c -;( -; const unsigned char *source, -; unsigned int source_width, -; unsigned char *dest, -; unsigned int dest_width -;) -|horizontal_line_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r5, lr} - - sub srcw, srcw, #1 - - ldrb r3, [src], #1 - ldrb r4, [src], #1 -hl12_loop - subs srcw, srcw, #1 - - add r5, r3, r4 - add r5, r5, #1 - mov r5, r5, lsr #1 - - orr r5, r3, r5, lsl #8 - strh r5, [dest], #2 - - mov r3, r4 - - ldrneb r4, [src], #1 - bne hl12_loop - - orr r5, r4, r4, lsl #8 - strh r5, [dest] - - ldmia sp!, {r4 - r5, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - -;/**************************************************************************** -; * -; * ROUTINE : vp8cx_vertical_band_1_2_scale_c -; * -; * INPUTS : unsigned char *dest : Pointer to destination data. -; * unsigned int dest_pitch : Stride of destination data. -; * unsigned int dest_width : Width of destination data. -; * -; * OUTPUTS : None. -; * -; * RETURNS : void -; * -; * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The -; * height of the band scaled is 1-pixel. -; * -; * SPECIAL NOTES : The routine uses the first line of the band below -; * the current band. -; * -; ****************************************************************************/ -;void vp8cx_vertical_band_1_2_scale_c -;( -; r0 = UINT8 *dest -; r1 = UINT32 dest_pitch -; r2 = UINT32 dest_width -;) -|vertical_band_1_2_scale_armv4| PROC - stmdb sp!, {r4 - r7, lr} - - ldr mask, =0xff00ff ; mask for selection - ldr lr, = 0x010001 - -vl12_loop - mov r3, src - ldr r4, [r3], r1 - ldr r5, [r3, r1] - - add src, src, #4 - subs r2, r2, #4 - - and r6, r4, mask - and r7, r5, mask - - add r6, r7, r6 - add r6, r6, lr - - and r4, mask, r4, lsr #8 - and r5, mask, r5, lsr #8 - - mov r6, r6, lsr #1 - and r6, r6, mask - - add r4, r5, r4 - add r4, r4, lr - - mov r4, r4, lsr #1 - and r4, r4, mask - - orr r5, r6, r4, lsl #8 - - str r5, [r3] - - bpl vl12_loop - - ldmia sp!, {r4 - r7, pc} - ENDP ;|vertical_band_3_5_scale_armv4| - - END diff -Nru libvpx-0.9.5/vpx_scale/wce/scalesystemdependant.c libvpx-0.9.6/vpx_scale/wce/scalesystemdependant.c --- libvpx-0.9.5/vpx_scale/wce/scalesystemdependant.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/wce/scalesystemdependant.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_scale/vpxscale.h" - -/**************************************************************************** -* Imports -*****************************************************************************/ - -/**************************************************************************** - * - * ROUTINE : vp8_scale_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void vp8_scale_machine_specific_config() -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_armv4; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_armv4; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = horizontal_line_3_4_scale_armv4; - vp8_vertical_band_3_4_scale = vertical_band_3_4_scale_armv4; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = horizontal_line_2_3_scale_armv4; - vp8_vertical_band_2_3_scale = vertical_band_2_3_scale_armv4; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_armv4; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_armv4; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - - - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; -} diff -Nru libvpx-0.9.5/vpx_scale/x86_64/scaleopt.c libvpx-0.9.6/vpx_scale/x86_64/scaleopt.c --- libvpx-0.9.5/vpx_scale/x86_64/scaleopt.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/x86_64/scaleopt.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1750 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : scaleopt.cpp -* -* Description : Optimized scaling functions -* -****************************************************************************/ -#include "pragmas.h" - - - -/**************************************************************************** -* Module Statics -****************************************************************************/ -__declspec(align(16)) const static unsigned short one_fifth[] = { 51, 51, 51, 51 }; -__declspec(align(16)) const static unsigned short two_fifths[] = { 102, 102, 102, 102 }; -__declspec(align(16)) const static unsigned short three_fifths[] = { 154, 154, 154, 154 }; -__declspec(align(16)) const static unsigned short four_fifths[] = { 205, 205, 205, 205 }; -__declspec(align(16)) const static unsigned short round_values[] = { 128, 128, 128, 128 }; -__declspec(align(16)) const static unsigned short four_ones[] = { 1, 1, 1, 1}; -__declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 }; -__declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 }; -__declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0}; -__declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 }; -__declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 }; - - - -#include "vpx_scale/vpxscale.h" -#include "vpx_mem/vpx_mem.h" - -/**************************************************************************** -* -* ROUTINE : horizontal_line_3_5_scale_mmx -* -* INPUTS : const unsigned char *source : -* unsigned int source_width : -* unsigned char *dest : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels. -* -* SPECIAL NOTES : None. -* -****************************************************************************/ -static -void horizontal_line_3_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - __asm - { - - push rbx - - mov rsi, source - mov rdi, dest - - mov ecx, source_width - lea rdx, [rsi+rcx-3]; - - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_3_5_loop: - - mov eax, DWORD PTR [rsi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [rdi], ebx // writeoutput 00 xx xx xx - add rsi, 3 - - add rdi, 5 - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - cmp rsi, rdx - packuswb mm0, mm7 - - movd DWORD Ptr [rdi-4], mm0 - jl horiz_line_3_5_loop - -//Exit: - mov eax, DWORD PTR [rsi] // eax = 00 01 02 03 - mov ebx, eax - - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 - - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx - - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 - - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 - - or eax, ebx // eax = 01 02 02 02 - - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // - - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // - - mov [rdi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 - - paddw mm0, mm4 - psrlw mm0, 8 - - packuswb mm0, mm7 - movd DWORD Ptr [rdi+1], mm0 - - pop rbx - - } - -} - - -/**************************************************************************** -* -* ROUTINE : horizontal_line_4_5_scale_mmx -* -* INPUTS : const unsigned char *source : -* unsigned int source_width : -* unsigned char *dest : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels. -* -* SPECIAL NOTES : None. -* -****************************************************************************/ -static -void horizontal_line_4_5_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void)dest_width; - - __asm - { - - mov rsi, source - mov rdi, dest - - mov ecx, source_width - lea rdx, [rsi+rcx-8]; - - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 - - horiz_line_4_5_loop: - - movq mm0, QWORD PTR [rsi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [rsi+1]; // mm1 = 01 02 03 04 05 06 07 08 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - - movd DWORD PTR [rdi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [rdi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 - - movd DWORD PTR [rdi+1], mm0 // write output 01 02 03 04 - add rdi, 10 - - add rsi, 8 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - cmp rsi, rdx - - psrlw mm2, 8 - packuswb mm2, mm7 - - movd DWORD PTR [rdi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop - -//Exit: - movq mm0, [rsi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - - movq mm3, mm1 - - movd DWORD PTR [rdi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - - movd DWORD PTR [rdi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - - paddw mm0, mm1 // added round values - paddw mm0, mm4 - - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - - movd DWORD PTR [rdi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // - - paddw mm2, mm4 // added round values - psrlw mm2, 8 - - packuswb mm2, mm7 - movd DWORD PTR [rdi+6], mm2 // writeoutput 06 07 08 09 - - - } -} - -/**************************************************************************** -* -* ROUTINE : vertical_band_4_5_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has a "C" only -* version. -* -****************************************************************************/ -static -void vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea rdi, [rsi+rcx*2] // tow lines below - add rdi, rcx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_4_5_loop: - - movq mm0, QWORD ptr [rsi] // src[0]; - movq mm1, QWORD ptr [rsi+rcx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [rsi+rcx], mm0 // write des[1] - movq mm0, [rsi+rcx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [rsi+rcx*2], mm1 // write des[2] - movq mm1, [rdi] // mm1=Src[3]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [rdi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - - movq mm0, [rdi+rcx*2] // mm0, Src[0] of the next group - - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 - - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy - - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low - - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high - - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 - - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] - - movq QWORD ptr [rdi+rcx], mm1 // write des[4] - - add rdi, 8 - add rsi, 8 - - sub rdx, 8 - jg vs_4_5_loop - } -} - -/**************************************************************************** -* -* ROUTINE : last_vertical_band_4_5_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : None -* -* FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has an "C" only -* version. -* -****************************************************************************/ -static -void last_vertical_band_4_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea rdi, [rsi+rcx*2] // tow lines below - add rdi, rcx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - last_vs_4_5_loop: - - movq mm0, QWORD ptr [rsi] // src[0]; - movq mm1, QWORD ptr [rsi+rcx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 1/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 - - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [rsi+rcx], mm0 // write des[1] - movq mm0, [rsi+rcx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm5, two_fifths - movq mm2, mm0 // make a copy - - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths - - - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 - - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 - - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 - - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 - - paddw mm3, round_values // + 128 - psrlw mm1, 8 - - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - - movq QWORD ptr [rsi+rcx*2], mm1 // write des[2] - movq mm1, [rdi] // mm1=Src[3]; - - movq QWORD ptr [rdi+rcx], mm1 // write des[4]; - - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking - - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 - - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 - - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy - - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 - - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 - - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 - - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - - psrlw mm0, 8 - psrlw mm2, 8 - - packuswb mm0, mm2 // des[3] - movq QWORD ptr [rdi], mm0 // write des[3] - - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add rdi, 8 - add rsi, 8 - - sub rdx, 8 - jg last_vs_4_5_loop - } -} - -/**************************************************************************** -* -* ROUTINE : vertical_band_3_5_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has an "C" only -* version. -* -****************************************************************************/ -static -void vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea rdi, [rsi+rcx*2] // two lines below - add rdi, rcx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_3_5_loop: - - movq mm0, QWORD ptr [rsi] // src[0]; - movq mm1, QWORD ptr [rsi+rcx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [rsi+rcx], mm0 // write des[1] - movq mm0, [rsi+rcx*2] // mm0 = src[2] - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [rsi+rcx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [rdi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [rdi+rcx*2] // mm1 = Src[0] of the next group - - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 - - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy - - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low - - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high - - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 - - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] - - movq QWORD ptr [rdi+rcx], mm0 // write des[4] - - add rdi, 8 - add rsi, 8 - - sub rdx, 8 - jg vs_3_5_loop - } -} - -/**************************************************************************** -* -* ROUTINE : last_vertical_band_3_5_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has an "C" only -* version. -* -****************************************************************************/ -static -void last_vertical_band_3_5_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - lea rdi, [rsi+rcx*2] // tow lines below - add rdi, rcx // three lines below - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - - last_vs_3_5_loop: - - movq mm0, QWORD ptr [rsi] // src[0]; - movq mm1, QWORD ptr [rsi+rcx] // src[1]; - - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word - - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word - - pmullw mm0, mm5 // a * 2/5 - - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word - - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 - - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 - - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b - - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 - - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 - - paddw mm2, round_values // + 128 - psrlw mm0, 8 - - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] - - movq QWORD ptr [rsi+rcx], mm0 // write des[1] - movq mm0, [rsi+rcx*2] // mm0 = src[2] - - - - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking - - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low - - movq QWORD ptr [rdi+rcx], mm0 // write des[4] - - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high - - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 - - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 - - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high - - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low - - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low - - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high - - pmullw mm7, four_fifths // c * 4/5 high - - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high - - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - - psrlw mm1, 8 - psrlw mm3, 8 - - packuswb mm1, mm3 // des[2] - movq QWORD ptr [rsi+rcx*2], mm1 // write des[2] - - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 - - psrlw mm4, 8 - psrlw mm5, 8 - - packuswb mm4, mm5 // des[3] - movq QWORD ptr [rdi], mm4 // write des[3] - - // mm0, mm2 --- Src[3] - - add rdi, 8 - add rsi, 8 - - sub rdx, 8 - jg last_vs_3_5_loop - } -} - -/**************************************************************************** -* -* ROUTINE : vertical_band_1_2_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 1 to 2 up-scaling of a band of pixels. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has an "C" only -* version. -* -****************************************************************************/ -static -void vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter - - vs_1_2_loop: - - movq mm0, [rsi] // get Src[0] - movq mm1, [rsi + rcx * 2] // get Src[1] - - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack - - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 - - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) - - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) - - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) - - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) - - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results - - movq [rsi+rcx], mm0 // write out eight bytes - add rsi, 8 - - sub rdx, 8 - jg vs_1_2_loop - } - -} - -/**************************************************************************** -* -* ROUTINE : last_vertical_band_1_2_scale_mmx -* -* INPUTS : unsigned char *dest : -* unsigned int dest_pitch : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 1 to 2 up-scaling of band of pixels. -* -* SPECIAL NOTES : The routine uses the first line of the band below -* the current band. The function also has an "C" only -* version. -* -****************************************************************************/ -static -void last_vertical_band_1_2_scale_mmx -( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov rsi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size - - mov edx, dest_width // Loop counter - - last_vs_1_2_loop: - - movq mm0, [rsi] // get Src[0] - movq [rsi+rcx], mm0 // write out eight bytes - - add rsi, 8 - sub rdx, 8 - - jg last_vs_1_2_loop - } -} - -/**************************************************************************** -* -* ROUTINE : horizontal_line_1_2_scale -* -* INPUTS : const unsigned char *source : -* unsigned int source_width : -* unsigned char *dest : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. -* -* SPECIAL NOTES : None. -* -****************************************************************************/ -static -void horizontal_line_1_2_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - __asm - { - mov rsi, source - mov rdi, dest - - pxor mm7, mm7 - movq mm6, four_ones - - mov ecx, source_width - - hs_1_2_loop: - - movq mm0, [rsi] - movq mm1, [rsi+1] - - movq mm2, mm0 - movq mm3, mm1 - - movq mm4, mm0 - punpcklbw mm0, mm7 - - punpcklbw mm1, mm7 - paddw mm0, mm1 - - paddw mm0, mm6 - punpckhbw mm2, mm7 - - punpckhbw mm3, mm7 - paddw mm2, mm3 - - paddw mm2, mm6 - psraw mm0, 1 - - psraw mm2, 1 - packuswb mm0, mm2 - - movq mm2, mm4 - punpcklbw mm2, mm0 - - movq [rdi], mm2 - punpckhbw mm4, mm0 - - movq [rdi+8], mm4 - add rsi, 8 - - add rdi, 16 - sub rcx, 8 - - cmp rcx, 8 - jg hs_1_2_loop - -// last eight pixel - - movq mm0, [rsi] - movq mm1, mm0 - - movq mm2, mm0 - movq mm3, mm1 - - psrlq mm1, 8 - psrlq mm3, 56 - - psllq mm3, 56 - por mm1, mm3 - - movq mm3, mm1 - movq mm4, mm0 - - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - - paddw mm0, mm1 - paddw mm0, mm6 - - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - - paddw mm2, mm3 - paddw mm2, mm6 - - psraw mm0, 1 - psraw mm2, 1 - - packuswb mm0, mm2 - movq mm2, mm4 - - punpcklbw mm2, mm0 - movq [rdi], mm2 - - punpckhbw mm4, mm0 - movq [rdi+8], mm4 - } -} - - - - - -__declspec(align(16)) const static unsigned short const54_2[] = { 0, 64, 128, 192 }; -__declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, 64 }; - - -/**************************************************************************** -* -* ROUTINE : horizontal_line_5_4_scale_mmx -* -* INPUTS : const unsigned char *source : Pointer to source data. -* unsigned int source_width : Stride of source. -* unsigned char *dest : Pointer to destination data. -* unsigned int dest_width : Stride of destination (NOT USED). -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : Copies horizontal line of pixels from source to -* destination scaling up by 4 to 5. -* -* SPECIAL NOTES : None. -* -****************************************************************************/ -static -void horizontal_line_5_4_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for ( i=0; i>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); - - src += 5; - des += 4; - } - */ - __asm - { - - mov rsi, source ; - mov rdi, dest ; - - mov ecx, source_width ; - movq mm5, const54_1 ; - - pxor mm7, mm7 ; - movq mm6, const54_2 ; - - movq mm4, round_values ; - lea rdx, [rsi+rcx] ; - horizontal_line_5_4_loop: - - movq mm0, QWORD PTR [rsi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psrlq mm0, 8 ; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7 ; - xx 00 xx 01 xx 02 xx 03 - - punpcklbw mm0, mm7 ; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 - - pmullw mm0, mm6 - add rsi, 5 - - add rdi, 4 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp rsi, rdx - packuswb mm1, mm7 - - movd DWORD PTR [rdi-4], mm1 - - jl horizontal_line_5_4_loop - - } - -} -__declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; -__declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, 128, 128 }; -__declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; - -static -void vertical_band_5_4_scale_mmx -( - unsigned char *source, - unsigned int src_pitch, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - - __asm - { - - mov rsi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov rdi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - mov ebx, dest_width - - vs_5_4_loop: - - movd mm0, DWORD ptr [rsi] // src[0]; - movd mm1, DWORD ptr [rsi+rcx] // src[1]; - - movd mm2, DWORD ptr [rsi+rcx*2] - lea rax, [rsi+rcx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - movq mm3, mm2 - pmullw mm1, three_fourths - - pmullw mm2, one_fourths - movd mm4, [rax+rcx] - - pmullw mm3, two_fourths - punpcklbw mm4, mm7 - - movq mm5, mm4 - pmullw mm4, two_fourths - - paddw mm1, mm2 - movd mm6, [rax+rcx*2] - - pmullw mm5, one_fourths - paddw mm1, round_values; - - paddw mm3, mm4 - psrlw mm1, 8 - - punpcklbw mm6, mm7 - paddw mm3, round_values - - pmullw mm6, three_fourths - psrlw mm3, 8 - - packuswb mm1, mm7 - packuswb mm3, mm7 - - movd DWORD PTR [rdi], mm0 - movd DWORD PTR [rdi+rdx], mm1 - - - paddw mm5, mm6 - movd DWORD PTR [rdi+rdx*2], mm3 - - lea rax, [rdi+rdx*2] - paddw mm5, round_values - - psrlw mm5, 8 - add rdi, 4 - - packuswb mm5, mm7 - movd DWORD PTR [rax+rdx], mm5 - - add rsi, 4 - sub rbx, 4 - - jg vs_5_4_loop - } -} - - -__declspec(align(16)) const static unsigned short const53_1[] = { 0, 85, 171, 0 }; -__declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, 0 }; - - -static -void horizontal_line_5_3_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - __asm - { - - mov rsi, source ; - mov rdi, dest ; - - mov ecx, source_width ; - movq mm5, const53_1 ; - - pxor mm7, mm7 ; - movq mm6, const53_2 ; - - movq mm4, round_values ; - lea rdx, [rsi+rcx-5] ; - horizontal_line_5_3_loop: - - movq mm0, QWORD PTR [rsi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - add rsi, 5 - - add rdi, 3 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - cmp rsi, rdx - packuswb mm1, mm7 - - movd DWORD PTR [rdi-3], mm1 - jl horizontal_line_5_3_loop - -//exit condition - movq mm0, QWORD PTR [rsi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 - - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx - - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx - - pmullw mm0, mm6 - - pmullw mm1, mm5 - paddw mm1, mm0 - - paddw mm1, mm4 - psrlw mm1, 8 - - packuswb mm1, mm7 - movd rax, mm1 - - mov rdx, rax - shr rdx, 16 - - mov WORD PTR[rdi], ax - mov BYTE PTR[rdi+2], dl - - } - -} - -__declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85, 85 }; -__declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; - -static -void vertical_band_5_3_scale_mmx -( - unsigned char *source, - unsigned int src_pitch, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - - __asm - { - - mov rsi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size - - mov rdi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 - - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds - - movq mm6, two_thirds - mov ebx, dest_width; - - vs_5_3_loop: - - movd mm0, DWORD ptr [rsi] // src[0]; - movd mm1, DWORD ptr [rsi+rcx] // src[1]; - - movd mm2, DWORD ptr [rsi+rcx*2] - lea rax, [rsi+rcx*2] // - - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 - - pmullw mm1, mm5 - pmullw mm2, mm6 - - movd mm3, DWORD ptr [rax+rcx] - movd mm4, DWORD ptr [rax+rcx*2] - - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 - - pmullw mm3, mm6 - pmullw mm4, mm5 - - - movd DWORD PTR [rdi], mm0 - paddw mm1, mm2 - - paddw mm1, round_values - psrlw mm1, 8 - - packuswb mm1, mm7 - paddw mm3, mm4 - - paddw mm3, round_values - movd DWORD PTR [rdi+rdx], mm1 - - psrlw mm3, 8 - packuswb mm3, mm7 - - movd DWORD PTR [rdi+rdx*2], mm3 - - - add rdi, 4 - add rsi, 4 - - sub rbx, 4 - jg vs_5_3_loop - } -} - - - - -/**************************************************************************** -* -* ROUTINE : horizontal_line_2_1_scale -* -* INPUTS : const unsigned char *source : -* unsigned int source_width : -* unsigned char *dest : -* unsigned int dest_width : -* -* OUTPUTS : None. -* -* RETURNS : void -* -* FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels. -* -* SPECIAL NOTES : None. -* -****************************************************************************/ -static -void horizontal_line_2_1_scale_mmx -( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - __asm - { - mov rsi, source - mov rdi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor rdx, rdx - hs_2_1_loop: - - movq mm0, [rsi+rdx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [rdi+rdx], mm0; - add rdx, 4 - - cmp rdx, rcx - jl hs_2_1_loop - - } -} - - - -static -void vertical_band_2_1_scale_mmx -( - unsigned char *source, - unsigned int src_pitch, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width) -{ - vpx_memcpy(dest, source, dest_width); -} - - -__declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 48, 48, 48 }; -__declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; - -static -void vertical_band_2_1_scale_i_mmx -( - unsigned char *source, - unsigned int src_pitch, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov rsi, source - mov rdi, dest - - mov eax, src_pitch - mov edx, dest_width - - pxor mm7, mm7 - sub rsi, rax //back one line - - - lea rcx, [rsi+rdx]; - movq mm6, round_values; - - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; - - vs_2_1_i_loop: - movd mm0, [rsi] // - movd mm1, [rsi+rax] // - - movd mm2, [rsi+rax*2] // - punpcklbw mm0, mm7 - - pmullw mm0, mm5 - punpcklbw mm1, mm7 - - pmullw mm1, mm4 - punpcklbw mm2, mm7 - - pmullw mm2, mm5 - paddw mm0, round_values - - paddw mm1, mm2 - paddw mm0, mm1 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD PTR [rdi], mm0 - add rsi, 4 - - add rdi, 4; - cmp rsi, rcx - jl vs_2_1_i_loop - - } -} - - - -void -register_mmxscalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; -} diff -Nru libvpx-0.9.5/vpx_scale/x86_64/scalesystemdependant.c libvpx-0.9.6/vpx_scale/x86_64/scalesystemdependant.c --- libvpx-0.9.5/vpx_scale/x86_64/scalesystemdependant.c 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/x86_64/scalesystemdependant.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : system_dependant.c -* -* Description : Miscellaneous system dependant functions -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ -#include "vpx_scale/vpxscale.h" -#include "cpuidlib.h" - -/**************************************************************************** -* Imports -*****************************************************************************/ -extern void register_generic_scalers(void); -extern void register_mmxscalers(void); - -/**************************************************************************** - * - * ROUTINE : post_proc_machine_specific_config - * - * INPUTS : UINT32 Version : Codec version number. - * - * OUTPUTS : None. - * - * RETURNS : void - * - * FUNCTION : Checks for machine specifc features such as MMX support - * sets appropriate flags and function pointers. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -void -vp8_scale_machine_specific_config(void) -{ - int wmt_enabled = 1; - - if (wmt_enabled) - { - register_mmxscalers(); - } - else - { - register_generic_scalers(); - } -} diff -Nru libvpx-0.9.5/vpx_scale/yv12config.h libvpx-0.9.6/vpx_scale/yv12config.h --- libvpx-0.9.5/vpx_scale/yv12config.h 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/vpx_scale/yv12config.h 2011-03-04 20:40:41.000000000 +0000 @@ -57,6 +57,8 @@ int border; int frame_size; YUV_TYPE clrtype; + + int corrupted; } YV12_BUFFER_CONFIG; int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); diff -Nru libvpx-0.9.5/wince_wmain_adapter.cpp libvpx-0.9.6/wince_wmain_adapter.cpp --- libvpx-0.9.5/wince_wmain_adapter.cpp 2010-10-28 13:14:14.000000000 +0000 +++ libvpx-0.9.6/wince_wmain_adapter.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* This program is created to take command arguments and pass - * them to main() in example.c or example_xma.c, because the - * correspending part in example.c or example_xma.c does not - * work on Pocket PC platform. - * To modify the command arguments, go to "Property" page and - * fill in "Command arguments." For example: - * --codec vp6 --flipuv --progress _bnd.vp6 - */ -#include -#include -#include - -#define MAX_NUM_ARG 64 -#define MAX_SIZ_ARG 512 - -extern "C" -{ - int main(int argc, char **argv); -} - -int wmain(int argc, wchar_t **argv) { - char *cargv[MAX_NUM_ARG]; - char chargv[MAX_SIZ_ARG]; - int ret; - - /* transform command line arguments from (wchar_t *) to (char *) */ - for(int i=0; i